""" .. _tut-reject-data-spans: =================================== Rejecting bad data spans and breaks =================================== This tutorial covers: - manual marking of bad spans of data, - automated rejection of data spans based on signal amplitude, and - automated detection of breaks during an experiment. We begin as always by importing the necessary Python modules and loading some :ref:`example data `; to save memory we'll use a pre-filtered and downsampled version of the example data, and we'll also load an events array to use when converting the continuous data to epochs: """ # Authors: The MNE-Python contributors. # License: BSD-3-Clause # Copyright the MNE-Python contributors. # %% import os import numpy as np import mne sample_data_folder = mne.datasets.sample.data_path() sample_data_raw_file = os.path.join( sample_data_folder, "MEG", "sample", "sample_audvis_filt-0-40_raw.fif" ) raw = mne.io.read_raw_fif(sample_data_raw_file, verbose=False) events_file = os.path.join( sample_data_folder, "MEG", "sample", "sample_audvis_filt-0-40_raw-eve.fif" ) events = mne.read_events(events_file) # %% # Annotating bad spans of data # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # # The tutorial :ref:`tut-events-vs-annotations` describes how # :class:`~mne.Annotations` can be read from embedded events in the raw # recording file, and :ref:`tut-annotate-raw` describes in detail how to # interactively annotate a :class:`~mne.io.Raw` data object. Here, we focus on # best practices for annotating *bad* data spans so that they will be excluded # from your analysis pipeline. # # # The ``reject_by_annotation`` parameter # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # # In the interactive ``raw.plot()`` window, the annotation controls can be # opened by pressing :kbd:`a`. Here, new annotation labels can be created or # existing annotation labels can be selected for use. fig = raw.plot() fig.fake_keypress("a") # Simulates user pressing 'a' on the keyboard. # %% # You can see that you need to add a description first to start with # marking spans (Push the button "Add Description" and enter the description). # You can use any description you like, but annotations marking spans that # should be excluded from the analysis pipeline should all begin with "BAD" or # "bad" (e.g., "bad_cough", "bad-eyes-closed", "bad door slamming", etc). When # this practice is followed, many processing steps in MNE-Python will # automatically exclude the "bad"-labelled spans of data; this behavior is # controlled by a parameter ``reject_by_annotation`` that can be found in many # MNE-Python functions or class constructors, including: # # - creation of epoched data from continuous data (:class:`mne.Epochs`) # - many methods of the independent components analysis class # (:class:`mne.preprocessing.ICA`) # - functions for finding heartbeat and blink artifacts # (:func:`~mne.preprocessing.find_ecg_events`, # :func:`~mne.preprocessing.find_eog_events`) # - covariance computations (:func:`mne.compute_raw_covariance`) # - power spectral density computation (:meth:`mne.io.Raw.compute_psd`) # # For example, when creating epochs from continuous data, if # ``reject_by_annotation=True`` the :class:`~mne.Epochs` constructor will drop # any epoch that partially or fully overlaps with an annotated span that begins # with "bad". # # # Generating annotations programmatically # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # # The :ref:`tut-artifact-overview` tutorial introduced the artifact detection # functions :func:`~mne.preprocessing.find_eog_events` and # :func:`~mne.preprocessing.find_ecg_events` (although that tutorial mostly # relied on their higher-level wrappers # :func:`~mne.preprocessing.create_eog_epochs` and # :func:`~mne.preprocessing.create_ecg_epochs`). Here, for demonstration # purposes, we make use of the lower-level artifact detection function to get # an events array telling us where the blinks are, then automatically add # "bad_blink" annotations around them (this is not necessary when using # :func:`~mne.preprocessing.create_eog_epochs`, it is done here just to show # how annotations are added non-interactively). We'll start the annotations # 250 ms before the blink and end them 250 ms after it: # sphinx_gallery_thumbnail_number = 3 eog_events = mne.preprocessing.find_eog_events(raw) onsets = eog_events[:, 0] / raw.info["sfreq"] - 0.25 durations = [0.5] * len(eog_events) descriptions = ["bad blink"] * len(eog_events) blink_annot = mne.Annotations( onsets, durations, descriptions, orig_time=raw.info["meas_date"] ) raw.set_annotations(blink_annot) # %% # Now we can confirm that the annotations are centered on the EOG events. Since # blinks are usually easiest to see in the EEG channels, we'll only plot EEG # here: eeg_picks = mne.pick_types(raw.info, meg=False, eeg=True) raw.plot(events=eog_events, order=eeg_picks) # %% # See the section :ref:`tut-section-programmatic-annotations` for more details # on creating annotations programmatically. # # Detecting and annotating breaks # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Another useful function, albeit not related to artifact detection *per se*, # is `mne.preprocessing.annotate_break`: It will generate annotations for # segments of the data where no existing annotations (or, alternatively: # events) can be found. It can therefore be used to automatically detect and # mark breaks, e.g. between experimental blocks, when recording continued. # # For the sake of this example, let's assume an experiment consisting of two # blocks, the first one stretching from 30 to 90, and the second from 120 to # 180 seconds. We'll mark these blocks by annotations, and then use # `mne.preprocessing.annotate_break` to detect and annotate any breaks. # # .. note:: We need to take ``raw.first_time`` into account, otherwise the # onsets will be incorrect! onsets = [raw.first_time + 30, raw.first_time + 180] durations = [60, 60] descriptions = ["block_1", "block_2"] block_annots = mne.Annotations( onset=onsets, duration=durations, description=descriptions, orig_time=raw.info["meas_date"], ) raw.set_annotations(raw.annotations + block_annots) # add to existing raw.plot() # %% # Now detect break periods. We can control how far the break annotations shall # expand toward both ends of each break. break_annots = mne.preprocessing.annotate_break( raw=raw, min_break_duration=20, # consider segments of at least 20 s duration t_start_after_previous=5, # start annotation 5 s after end of previous one t_stop_before_next=2, # stop annotation 2 s before beginning of next one ) raw.set_annotations(raw.annotations + break_annots) # add to existing raw.plot() # %% # You can see that 3 segments have been annotated as ``BAD_break``: # # - the first one starting with the beginning of the recording and ending 2 # seconds before the beginning of block 1 (due to ``t_stop_before_next=2``), # - the second one starting 5 seconds after block 1 has ended, and ending 2 # seconds before the beginning of block 2 (``t_start_after_previous=5``, # ``t_stop_before_next=2``), # - and the last one starting 5 seconds after block 2 has ended # (``t_start_after_previous=5``) and continuing until the end of the # recording. # # You can also see that only the ``block_1`` and ``block_2`` annotations # were considered in the detection of the break periods – the EOG annotations # were simply ignored. This is because, by default, # `~mne.preprocessing.annotate_break` ignores all annotations starting with # ``'bad'``. You can control this behavior via the ``ignore`` parameter. # # It is also possible to perform break period detection based on an array # of events: simply pass the array via the ``events`` parameter. Existing # annotations in the raw data will be ignored in this case: # only keep some button press events (code 32) for this demonstration events_subset = events[events[:, -1] == 32] # drop the first and last few events events_subset = events_subset[3:-3] break_annots = mne.preprocessing.annotate_break( raw=raw, events=events_subset, # passing events will ignore existing annotations min_break_duration=25, # pick a longer break duration this time ) # replace existing annotations (otherwise it becomes difficult to see any # effects in the plot!) raw.set_annotations(break_annots) raw.plot(events=events_subset) # %% # .. _`tut-reject-epochs-section`: # # Rejecting Epochs based on peak-to-peak channel amplitude # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # # Besides "bad" annotations, the :class:`mne.Epochs` class constructor has # another means of rejecting epochs, based on signal amplitude thresholds for # each channel type. In the :ref:`overview tutorial # ` we saw an example of this: setting maximum # acceptable peak-to-peak amplitudes for each channel type in an epoch, using # the ``reject`` parameter. There is also a related parameter, ``flat``, that # can be used to set *minimum* acceptable peak-to-peak amplitudes for each # channel type in an epoch: reject_criteria = dict( mag=3000e-15, # 3000 fT grad=3000e-13, # 3000 fT/cm eeg=100e-6, # 100 µV eog=200e-6, ) # 200 µV flat_criteria = dict(mag=1e-15, grad=1e-13, eeg=1e-6) # 1 fT # 1 fT/cm # 1 µV # %% # The values that are appropriate are dataset- and hardware-dependent, so some # trial-and-error may be necessary to find the correct balance between data # quality and loss of power due to too many dropped epochs. Here, we've set the # rejection criteria to be fairly stringent, for illustration purposes. # # Two additional parameters, ``reject_tmin`` and ``reject_tmax``, are used to # set the temporal window in which to calculate peak-to-peak amplitude for the # purposes of epoch rejection. These default to the same ``tmin`` and ``tmax`` # of the entire epoch. As one example, if you wanted to only apply the # rejection thresholds to the portion of the epoch that occurs *before* the # event marker around which the epoch is created, you could set # ``reject_tmax=0``. A summary of the causes of rejected epochs can be # generated with the :meth:`~mne.Epochs.plot_drop_log` method: raw.set_annotations(blink_annot) # restore the EOG annotations epochs = mne.Epochs( raw, events, tmin=-0.2, tmax=0.5, reject_tmax=0, reject=reject_criteria, flat=flat_criteria, reject_by_annotation=False, preload=True, ) epochs.plot_drop_log() # %% # Notice that we've passed ``reject_by_annotation=False`` above, in order to # isolate the effects of the rejection thresholds. If we re-run the epoching # with ``reject_by_annotation=True`` (the default) we see that the rejections # due to EEG and EOG channels have disappeared (suggesting that those channel # fluctuations were probably blink-related, and were subsumed by rejections # based on the "bad blink" label). epochs = mne.Epochs( raw, events, tmin=-0.2, tmax=0.5, reject_tmax=0, reject=reject_criteria, flat=flat_criteria, preload=True, ) epochs.plot_drop_log() # %% # More importantly, note that *many* more epochs are rejected (~12.2% instead # of ~2.5%) when rejecting based on the blink labels, underscoring why it is # usually desirable to repair artifacts rather than exclude them. # # The :meth:`~mne.Epochs.plot_drop_log` method is a visualization of an # :class:`~mne.Epochs` attribute, namely ``epochs.drop_log``, which stores # empty lists for retained epochs and lists of strings for dropped epochs, with # the strings indicating the reason(s) why the epoch was dropped. For example: print(epochs.drop_log) # %% # Finally, it should be noted that "dropped" epochs are not necessarily deleted # from the :class:`~mne.Epochs` object right away. Above, we forced the # dropping to happen when we created the :class:`~mne.Epochs` object by using # the ``preload=True`` parameter. If we had not done that, the # :class:`~mne.Epochs` object would have been `memory-mapped`_ (not loaded into # RAM), in which case the criteria for dropping epochs are stored, and the # actual dropping happens when the :class:`~mne.Epochs` data are finally loaded # and used. There are several ways this can get triggered, such as: # # - explicitly loading the data into RAM with the :meth:`~mne.Epochs.load_data` # method # - plotting the data (:meth:`~mne.Epochs.plot`, # :meth:`~mne.Epochs.plot_image`, etc) # - using the :meth:`~mne.Epochs.average` method to create an # :class:`~mne.Evoked` object # # You can also trigger dropping with the :meth:`~mne.Epochs.drop_bad` method; # if ``reject`` and/or ``flat`` criteria have already been provided to the # epochs constructor, :meth:`~mne.Epochs.drop_bad` can be used without # arguments to simply delete the epochs already marked for removal (if the # epochs have already been dropped, nothing further will happen): epochs.drop_bad() # %% # Alternatively, if rejection thresholds were not originally given to the # :class:`~mne.Epochs` constructor, they can be passed to # :meth:`~mne.Epochs.drop_bad` later instead; this can also be a way of # imposing progressively more stringent rejection criteria: stronger_reject_criteria = dict( mag=2000e-15, # 2000 fT grad=2000e-13, # 2000 fT/cm eeg=100e-6, # 100 µV eog=100e-6, ) # 100 µV epochs.drop_bad(reject=stronger_reject_criteria) print(epochs.drop_log) # %% # .. _`tut-reject-epochs-func-section`: # # Rejecting Epochs using callables (functions) # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # Sometimes it is useful to reject epochs based criteria other than # peak-to-peak amplitudes. For example, we might want to reject epochs # based on the maximum or minimum amplitude of a channel. # In this case, the `mne.Epochs.drop_bad` function also accepts # callables (functions) in the ``reject`` and ``flat`` parameters. This # allows us to define functions to reject epochs based on our desired criteria. # # Let's begin by generating Epoch data with large artifacts in one eeg channel # in order to demonstrate the versatility of this approach. raw.crop(0, 5) raw.del_proj() chans = raw.info["ch_names"][-5:-1] raw.pick(chans) data = raw.get_data() new_data = data new_data[0, 180:200] *= 1e3 new_data[0, 460:580] += 1e-3 edit_raw = mne.io.RawArray(new_data, raw.info) # Create fixed length epochs of 1 second events = mne.make_fixed_length_events(edit_raw, id=1, duration=1.0, start=0) epochs = mne.Epochs(edit_raw, events, tmin=0, tmax=1, baseline=None) epochs.plot(scalings=dict(eeg=50e-5)) # %% # As you can see, we have two large artifacts in the first channel. One large # spike in amplitude and one large increase in amplitude. # Let's try to reject the epoch containing the spike in amplitude based on the # maximum amplitude of the first channel. Please note that the callable in # ``reject`` must return a (good, reason) tuple. Where the good must be bool # and reason must be a str, list, or tuple where each entry is a str. epochs = mne.Epochs( edit_raw, events, tmin=0, tmax=1, baseline=None, preload=True, ) epochs.drop_bad( reject=dict(eeg=lambda x: ((np.max(x, axis=1) > 1e-2).any(), "max amp")) ) epochs.plot(scalings=dict(eeg=50e-5)) # %% # Here, the epoch containing the spike in amplitude was rejected for having a # maximum amplitude greater than 1e-2 Volts. Notice the use of the ``any()`` # function to check if any of the channels exceeded the threshold. We could # have also used the ``all()`` function to check if all channels exceeded the # threshold. # Next, let's try to reject the epoch containing the increase in amplitude # using the median. epochs = mne.Epochs( edit_raw, events, tmin=0, tmax=1, baseline=None, preload=True, ) epochs.drop_bad( reject=dict(eeg=lambda x: ((np.median(x, axis=1) > 1e-4).any(), "median amp")) ) epochs.plot(scalings=dict(eeg=50e-5)) # %% # Finally, let's try to reject both epochs using a combination of the maximum # and median. We'll define a custom function and use boolean operators to # combine the two criteria. def reject_criteria(x): max_condition = np.max(x, axis=1) > 1e-2 median_condition = np.median(x, axis=1) > 1e-4 return ((max_condition.any() or median_condition.any()), ["max amp", "median amp"]) epochs = mne.Epochs( edit_raw, events, tmin=0, tmax=1, baseline=None, preload=True, ) epochs.drop_bad(reject=dict(eeg=reject_criteria)) epochs.plot(events=True) # %% # Note that a complementary Python module, the `autoreject package`_, uses # machine learning to find optimal rejection criteria, and is designed to # integrate smoothly with MNE-Python workflows. This can be a considerable # time-saver when working with heterogeneous datasets. # # # .. LINKS # # .. _`memory-mapped`: https://en.wikipedia.org/wiki/Memory-mapped_file # .. _`autoreject package`: http://autoreject.github.io/