""" .. _tut-erp-stats: =========================================================== Visualising statistical significance thresholds on EEG data =========================================================== MNE-Python provides a range of tools for statistical hypothesis testing and the visualisation of the results. Here, we show a few options for exploratory and confirmatory tests - e.g., targeted t-tests, cluster-based permutation approaches (here with Threshold-Free Cluster Enhancement); and how to visualise the results. The underlying data comes from :footcite:`DufauEtAl2015`; we contrast long vs. short words. TFCE is described in :footcite:`SmithNichols2009`. """ # Authors: The MNE-Python contributors. # License: BSD-3-Clause # Copyright the MNE-Python contributors. # %% import matplotlib.pyplot as plt import numpy as np from scipy.stats import ttest_ind import mne from mne.channels import find_ch_adjacency, make_1020_channel_selections from mne.stats import spatio_temporal_cluster_test np.random.seed(0) # Load the data path = mne.datasets.kiloword.data_path() / "kword_metadata-epo.fif" epochs = mne.read_epochs(path) # These data are quite smooth, so to speed up processing we'll (unsafely!) just # decimate them epochs.decimate(4, verbose="error") name = "NumberOfLetters" # Split up the data by the median length in letters via the attached metadata median_value = str(epochs.metadata[name].median()) long_words = epochs[name + " > " + median_value] short_words = epochs[name + " < " + median_value] ############################################################################# # If we have a specific point in space and time we wish to test, it can be # convenient to convert the data into Pandas Dataframe format. In this case, # the :class:`mne.Epochs` object has a convenient # :meth:`mne.Epochs.to_data_frame` method, which returns a dataframe. # This dataframe can then be queried for specific time windows and sensors. # The extracted data can be submitted to standard statistical tests. Here, # we conduct t-tests on the difference between long and short words. time_windows = ((0.2, 0.25), (0.35, 0.45)) elecs = ["Fz", "Cz", "Pz"] index = ["condition", "epoch", "time"] # display the EEG data in Pandas format (first 5 rows) print(epochs.to_data_frame(index=index)[elecs].head()) report = "{elec}, time: {tmin}-{tmax} s; t({df})={t_val:.3f}, p={p:.3f}" print("\nTargeted statistical test results:") for tmin, tmax in time_windows: long_df = long_words.copy().crop(tmin, tmax).to_data_frame(index=index) short_df = short_words.copy().crop(tmin, tmax).to_data_frame(index=index) for elec in elecs: # extract data A = long_df[elec].groupby("condition").mean() B = short_df[elec].groupby("condition").mean() # conduct t test t, p = ttest_ind(A, B) # display results format_dict = dict( elec=elec, tmin=tmin, tmax=tmax, df=len(epochs.events) - 2, t_val=t, p=p ) print(report.format(**format_dict)) ############################################################################## # Absent specific hypotheses, we can also conduct an exploratory # mass-univariate analysis at all sensors and time points. This requires # correcting for multiple tests. # MNE offers various methods for this; amongst them, cluster-based permutation # methods allow deriving power from the spatio-temoral correlation structure # of the data. Here, we use TFCE. # Calculate adjacency matrix between sensors from their locations adjacency, _ = find_ch_adjacency(epochs.info, "eeg") # Extract data: transpose because the cluster test requires channels to be last # In this case, inference is done over items. In the same manner, we could # also conduct the test over, e.g., subjects. X = [ long_words.get_data(copy=False).transpose(0, 2, 1), short_words.get_data(copy=False).transpose(0, 2, 1), ] tfce = dict(start=0.4, step=0.4) # ideally start and step would be smaller # Calculate statistical thresholds t_obs, clusters, cluster_pv, h0 = spatio_temporal_cluster_test( X, tfce, adjacency=adjacency, n_permutations=100 ) # a more standard number would be 1000+ significant_points = cluster_pv.reshape(t_obs.shape).T < 0.05 print(str(significant_points.sum()) + " points selected by TFCE ...") ############################################################################## # The results of these mass univariate analyses can be visualised by plotting # :class:`mne.Evoked` objects as images (via :class:`mne.Evoked.plot_image`) # and masking points for significance. # Here, we group channels by Regions of Interest to facilitate localising # effects on the head. # We need an evoked object to plot the image to be masked evoked = mne.combine_evoked( [long_words.average(), short_words.average()], weights=[1, -1] ) # calculate difference wave time_unit = dict(time_unit="s") evoked.plot_joint( title="Long vs. short words", ts_args=time_unit, topomap_args=time_unit ) # show difference wave # Create ROIs by checking channel labels selections = make_1020_channel_selections(evoked.info, midline="12z") # Visualize the results fig, axes = plt.subplots(nrows=3, figsize=(8, 8)) axes = {sel: ax for sel, ax in zip(selections, axes.ravel())} evoked.plot_image( axes=axes, group_by=selections, colorbar=False, show=False, mask=significant_points, show_names="all", titles=None, **time_unit, ) plt.colorbar(axes["Left"].images[-1], ax=list(axes.values()), shrink=0.3, label="µV") plt.show() # %% # References # ---------- # .. footbibliography::