{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Analyzing replicability of functional connectivity-based multivariate BWAS on the Human Connectome Project dataset\n", "\n", "Comprehensive analysis with 52 HCP phenotypes\n", "\n", "## Imports" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2021-08-03T20:04:15.431840Z", "start_time": "2021-08-03T20:04:14.753565Z" } }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "from sklearn.linear_model import Ridge\n", "from sklearn.svm import SVR\n", "from sklearn.model_selection import KFold, train_test_split\n", "from sklearn.pipeline import Pipeline\n", "from sklearn.decomposition import PCA\n", "from joblib import Parallel, delayed\n", "from mlxtend.evaluate import permutation_test\n", "sns.set(rc={\"figure.figsize\":(4, 2)})\n", "sns.set_style(\"whitegrid\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load HCP data\n", "\n", "We load functional network matrices (netmats) from the HCP1200-release, as published on connectomeDB: https://db.humanconnectome.org/\n", "Due to licensing issues, data is not supplied with the repository, but can be downloaded from the ConnectomeDB or via `get_data.ipynb` (requires credentials).\n", "See [readme.md](readme.md) for more details." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
T1_CountT2_Count3T_RS-fMRI_Count3T_RS-fMRI_PctCompl3T_tMRI_PctComplfMRI_WM_PctComplfMRI_Gamb_PctComplfMRI_Mot_PctComplfMRI_Lang_PctComplfMRI_Soc_PctCompl...Odor_UnadjOdor_AgeAdjPainIntens_RawScorePainInterf_TscoreTaste_UnadjTaste_AgeAdjMars_Log_ScoreMars_ErrsMars_Finalage
count1206.0000001206.0000001206.0000001206.0000001206.0000001206.0000001206.0000001206.0000001206.0000001206.000000...1204.0000001204.0000001201.0000001205.0000001200.0000001200.0000001198.0000001195.0000001195.0000001206.000000
mean1.4784411.4004983.50995087.21326788.21956989.71807689.85273689.63167587.02711487.039801...110.42132197.7275001.44962545.84771895.16698393.9985331.8454670.5815901.82225128.904229
std0.6356880.6282161.21518131.02788629.94216130.38486430.16355930.49209233.56604333.570248...9.10796311.2732511.7830697.67928814.58341214.8378510.5413930.9731720.5428933.570475
min0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...82.74000059.8600000.00000038.60000056.35000059.5000001.5600000.0000001.08000023.500000
25%1.0000001.0000004.000000100.000000100.000000100.000000100.000000100.000000100.000000100.000000...101.12000087.1100000.00000038.60000084.07000083.2200001.8000000.0000001.76000028.000000
50%2.0000001.0000004.000000100.000000100.000000100.000000100.000000100.000000100.000000100.000000...108.79000098.0400001.00000045.90000095.36000094.9700001.8000000.0000001.80000028.000000
75%2.0000002.0000004.000000100.000000100.000000100.000000100.000000100.000000100.000000100.000000...122.250000110.4500002.00000052.200000105.570000102.9200001.8800001.0000001.84000033.000000
max2.0000002.0000004.000000100.000000100.000000100.000000100.000000100.000000100.000000100.000000...122.250000111.41000010.00000075.300000134.650000131.38000015.00000017.00000015.00000036.000000
\n", "

8 rows × 456 columns

\n", "
" ], "text/plain": [ " T1_Count T2_Count 3T_RS-fMRI_Count 3T_RS-fMRI_PctCompl \\\n", "count 1206.000000 1206.000000 1206.000000 1206.000000 \n", "mean 1.478441 1.400498 3.509950 87.213267 \n", "std 0.635688 0.628216 1.215181 31.027886 \n", "min 0.000000 0.000000 0.000000 0.000000 \n", "25% 1.000000 1.000000 4.000000 100.000000 \n", "50% 2.000000 1.000000 4.000000 100.000000 \n", "75% 2.000000 2.000000 4.000000 100.000000 \n", "max 2.000000 2.000000 4.000000 100.000000 \n", "\n", " 3T_tMRI_PctCompl fMRI_WM_PctCompl fMRI_Gamb_PctCompl \\\n", "count 1206.000000 1206.000000 1206.000000 \n", "mean 88.219569 89.718076 89.852736 \n", "std 29.942161 30.384864 30.163559 \n", "min 0.000000 0.000000 0.000000 \n", "25% 100.000000 100.000000 100.000000 \n", "50% 100.000000 100.000000 100.000000 \n", "75% 100.000000 100.000000 100.000000 \n", "max 100.000000 100.000000 100.000000 \n", "\n", " fMRI_Mot_PctCompl fMRI_Lang_PctCompl fMRI_Soc_PctCompl ... \\\n", "count 1206.000000 1206.000000 1206.000000 ... \n", "mean 89.631675 87.027114 87.039801 ... \n", "std 30.492092 33.566043 33.570248 ... \n", "min 0.000000 0.000000 0.000000 ... \n", "25% 100.000000 100.000000 100.000000 ... \n", "50% 100.000000 100.000000 100.000000 ... \n", "75% 100.000000 100.000000 100.000000 ... \n", "max 100.000000 100.000000 100.000000 ... \n", "\n", " Odor_Unadj Odor_AgeAdj PainIntens_RawScore PainInterf_Tscore \\\n", "count 1204.000000 1204.000000 1201.000000 1205.000000 \n", "mean 110.421321 97.727500 1.449625 45.847718 \n", "std 9.107963 11.273251 1.783069 7.679288 \n", "min 82.740000 59.860000 0.000000 38.600000 \n", "25% 101.120000 87.110000 0.000000 38.600000 \n", "50% 108.790000 98.040000 1.000000 45.900000 \n", "75% 122.250000 110.450000 2.000000 52.200000 \n", "max 122.250000 111.410000 10.000000 75.300000 \n", "\n", " Taste_Unadj Taste_AgeAdj Mars_Log_Score Mars_Errs Mars_Final \\\n", "count 1200.000000 1200.000000 1198.000000 1195.000000 1195.000000 \n", "mean 95.166983 93.998533 1.845467 0.581590 1.822251 \n", "std 14.583412 14.837851 0.541393 0.973172 0.542893 \n", "min 56.350000 59.500000 1.560000 0.000000 1.080000 \n", "25% 84.070000 83.220000 1.800000 0.000000 1.760000 \n", "50% 95.360000 94.970000 1.800000 0.000000 1.800000 \n", "75% 105.570000 102.920000 1.880000 1.000000 1.840000 \n", "max 134.650000 131.380000 15.000000 17.000000 15.000000 \n", "\n", " age \n", "count 1206.000000 \n", "mean 28.904229 \n", "std 3.570475 \n", "min 23.500000 \n", "25% 28.000000 \n", "50% 28.000000 \n", "75% 33.000000 \n", "max 36.000000 \n", "\n", "[8 rows x 456 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# HCP data can be obtained from the connectomeDB\n", "# data is not part of this repository\n", "subjectIDs = pd.read_csv('hcp_data/subjectIDs.txt', header=None)\n", "\n", "netmats_pearson = pd.read_csv('hcp_data/netmats1_correlationZ.txt',\n", " sep=' ',\n", " header=None)\n", "netmats_pearson['ID'] = subjectIDs[0]\n", "netmats_pearson.set_index('ID', drop=True, inplace=True)\n", "\n", "\n", "netmats_parcor = pd.read_csv('hcp_data/netmats2_partial-correlation.txt',\n", " sep=' ',\n", " header=None)\n", "netmats_parcor['ID'] = subjectIDs[0]\n", "netmats_parcor.set_index('ID', drop=True, inplace=True)\n", "\n", "behavior = pd.read_csv('hcp_data/hcp1200_behavioral_data.csv')\n", "behavior = behavior.set_index('Subject', drop=True)\n", "\n", "# convert age to numeric\n", "age = []\n", "for s in behavior['Age']:\n", " if s == '36+':\n", " age.append(36)\n", " else:\n", " split = s.split(sep='-')\n", " age.append(np.mean((float(split[0]), float(split[1]))))\n", "\n", "behavior['age'] = age\n", "behavior.describe()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Helper function to prepare target variable\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "def create_data(target='CogTotalComp_AgeAdj', feature_data=netmats_parcor):\n", " # it's a good practice to use pandas for merging, messing up subject order can be painful\n", " features = feature_data.columns\n", " df = behavior\n", " df = df.merge(feature_data, left_index=True, right_index=True, how='left')\n", "\n", " df = df.dropna(subset = [target] + features.values.tolist())\n", " y = df[target].values\n", " X = df[features].values\n", " return X, y" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Helper function implementing a single bootstrap iteration\n", "\n", "We define a workhorse function which:\n", "- randomly samples the discovery and the replication datasets,\n", "- creates cross-validated estimates of predictive performance within the discovery sample\n", "- finalizes the model by fitting it to the whole discovery sample (overfits the discovery but not the replication sample)\n", "- use it to predict the replication sample" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "def corr(X, Y):\n", " # just a small wrapper function (pandas correlation is silent in \"unlucky\" bootstraps with constant values)\n", " return pd.Series(X).corr( pd.Series(Y))\n", "\n", "def bootstrap_workhorse(X, y, sample_size, model, random_state, shuffle_y=False):\n", "\n", " #create discovery and replication samples by random sampling from the whole dataset (without replacement)\n", "\n", " # if shuffle_y is true, a null model is created bz permuting y\n", " if shuffle_y:\n", " rng = np.random.default_rng(random_state)\n", " y = rng.permutation(y)\n", "\n", " # sample the discovery and replication sets *without replacement* (with replacement introduces spurious dependencies)\n", " X_discovery, X_replication, y_discovery, y_replication = train_test_split(X, y, train_size=sample_size, test_size=sample_size, shuffle=True, random_state=random_state)\n", "\n", " # standard 10-fold cross-validation\n", " cv = KFold(10)\n", "\n", " # below we obtain cross-validated predictions in the discovery sample\n", " predicted_discovery_cv = np.zeros_like(y_discovery) # here we collect the predictions for each fold\n", " cor_per_fold = np.zeros(cv.n_splits) # here we collect the predictive performance in each fold\n", " i = 0 # just a counter\n", " for train, test in cv.split(X=X_discovery, y=y_discovery): # loop to leave one fold out\n", " model.fit(X=X_discovery[train], y=y_discovery[train]) # fit model to the training set\n", " predicted_discovery_cv[test] = model.predict(X=X_discovery[test]) # use fitted model to predict teh test set\n", " cor_per_fold[i] = corr(y_discovery[test], predicted_discovery_cv[test]) # calculate performance on tne test set\n", " i += 1\n", " # calculate mean test performance across all folds\n", " r_disc_cv = np.mean(cor_per_fold)\n", " # 'finalize' model by training it on the full discovery sample (without cross-validation)\n", " final_model = model.fit(X=X_discovery, y=y_discovery)\n", " # obtain predictions with the final model on the discovery sample, note that this model actually overfits this sample.\n", " # we do this only to demonstrate biased estimates\n", " predicted_discovery_overfit = final_model.predict(X=X_discovery)\n", " # here we obtain the biased effect size (r) estimates for demonstrational purposes\n", " r_disc_overfit = corr(predicted_discovery_overfit, y_discovery)\n", "\n", " # We use the final model to predict the replication sample\n", " # This is correct (no overfitting here), the final model did not see this data during training\n", " predicted_replication = final_model.predict(X=X_replication)\n", " # we obtain the out-of-sample prediction performance estimates\n", " r_rep = corr(predicted_replication, y_replication)\n", "\n", " # below we calculate permutation-based p-values for all three effect size estimates (in-sample unbiased, in-sample biased, out-of-sample)\n", " # (one sided tests, testing for positive correlation)\n", " p_disc_cv = permutation_test(predicted_discovery_cv, y_discovery, method='approximate', num_rounds=1000, func=lambda x, y: corr(x, y),seed=random_state)\n", " p_disc_overfit = permutation_test(predicted_discovery_overfit, y_discovery, method='approximate', num_rounds=1000, func=lambda x, y: corr(x, y),seed=random_state)\n", " p_rep = permutation_test(predicted_replication, y_replication, method='approximate', num_rounds=1000, func=lambda x, y: corr(x, y),seed=random_state)\n", " # return results\n", " return r_disc_cv, r_disc_overfit, r_rep, p_disc_cv, p_disc_overfit, p_rep" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "All set, now we start the analysis." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Replicability with sample sizes n=50, 100, 200, 300 and max\n", "Here we train a few different models on 100 bootstrap samples.\n", "\n", "We aggregate the results of our workhorse function in `n_bootstrap`=100 bootstrap cases (run in parallel).\n", "\n", "The whole process is repeated for all sample sizes, fetaure_sets and target variables.\n", "\n", "## Here we test all 33 variables, proposed by Marek et al. in their response." ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "targets = [\n", "#demographics\n", " 'age',\n", "# cognition\n", " 'MMSE_Score',\n", " 'PSQI_Score',\n", " 'PicSeq_AgeAdj',\n", " 'CardSort_AgeAdj',\n", " 'Flanker_AgeAdj',\n", " 'PMAT24_A_CR',\n", " 'CogFluidComp_AgeAdj',\n", " 'CogCrystalComp_AgeAdj',\n", " 'CogEarlyComp_AgeAdj',\n", " 'CogTotalComp_AgeAdj',\n", " 'ReadEng_AgeAdj',\n", " 'PicVocab_AgeAdj',\n", " 'ProcSpeed_AgeAdj',\n", " 'DDisc_AUC_200',\n", " 'VSPLOT_TC',\n", " 'SCPT_SEN',\n", " 'SCPT_SPEC',\n", "# memory\n", " 'IWRD_TOT',\n", " 'ListSort_AgeAdj',\n", "# emotion\n", " 'ER40ANG',\n", " 'ER40FEAR',\n", " 'ER40HAP',\n", " 'ER40NOE',\n", " 'ER40SAD',\n", " 'AngAffect_Unadj',\n", " 'AngHostil_Unadj',\n", " 'AngAggr_Unadj',\n", " 'FearAffect_Unadj',\n", " 'FearSomat_Unadj',\n", " 'FearSomat_Unadj',\n", " 'Sadness_Unadj',\n", " 'LifeSatisf_Unadj',\n", " 'MeanPurp_Unadj',\n", " 'PosAffect_Unadj',\n", " 'Friendship_Unadj',\n", " 'Loneliness_Unadj',\n", " 'PercHostil_Unadj',\n", " 'PercReject_Unadj',\n", " 'EmotSupp_Unadj',\n", " 'InstruSupp_Unadj',\n", " 'PercStress_Unadj',\n", " 'SelfEff_Unadj',\n", " 'Dexterity_AgeAdj',\n", " # personality\n", " 'NEOFAC_A',\n", " 'NEOFAC_O',\n", " 'NEOFAC_C',\n", " 'NEOFAC_N',\n", " 'NEOFAC_E',\n", "# sensory\n", " 'Noise_Comp',\n", " 'Odor_AgeAdj',\n", " 'PainInterf_Tscore',\n", " 'Taste_AgeAdj',\n", " 'Mars_Final'\n", "]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Now we fit a simple Ridge regression\n", "(no feature selection, no hyperparameter optimization)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "*****************************************************************\n", "netmats_parcor ridge age max\n", "r discovery (with cv) : 0.48020846359551633 r replication: 0.48967065593640496\n", "Replicability at alpha = 0.05 : 100.0 %\n", "Replicability at alpha = 0.01 : 100.0 %\n", "Replicability at alpha = 0.005 : 100.0 %\n", "Replicability at alpha = 0.001 : 100.0 %\n", "*****************************************************************\n", "netmats_parcor ridge MMSE_Score max\n", "r discovery (with cv) : 0.11235242553787785 r replication: 0.12970357972616348\n", "Replicability at alpha = 0.05 : 89.87341772151899 %\n", "Replicability at alpha = 0.01 : 40.50632911392405 %\n", "Replicability at alpha = 0.005 : 25.31645569620253 %\n", "Replicability at alpha = 0.001 : 2.5316455696202533 %\n", "*****************************************************************\n", "netmats_parcor ridge PSQI_Score max\n", "r discovery (with cv) : 0.14291306988028807 r replication: 0.14519995923911966\n", "Replicability at alpha = 0.05 : 98.85057471264368 %\n", "Replicability at alpha = 0.01 : 72.41379310344827 %\n", "Replicability at alpha = 0.005 : 47.12643678160919 %\n", "Replicability at alpha = 0.001 : 16.091954022988507 %\n", "*****************************************************************\n", "netmats_parcor ridge PicSeq_AgeAdj max\n", "r discovery (with cv) : 0.18233069324505247 r replication: 0.18593093519593903\n", "Replicability at alpha = 0.05 : 100.0 %\n", "Replicability at alpha = 0.01 : 93.87755102040816 %\n", "Replicability at alpha = 0.005 : 90.81632653061224 %\n", "Replicability at alpha = 0.001 : 66.3265306122449 %\n", "*****************************************************************\n", "netmats_parcor ridge CardSort_AgeAdj max\n", "r discovery (with cv) : 0.1789658621130238 r replication: 0.18957693758366514\n", "Replicability at alpha = 0.05 : 100.0 %\n", "Replicability at alpha = 0.01 : 96.90721649484536 %\n", "Replicability at alpha = 0.005 : 91.75257731958763 %\n", "Replicability at alpha = 0.001 : 65.97938144329896 %\n", "*****************************************************************\n", "netmats_parcor ridge Flanker_AgeAdj max\n", "r discovery (with cv) : 0.1322612349491906 r replication: 0.14022631939127772\n", "Replicability at alpha = 0.05 : 98.87640449438202 %\n", "Replicability at alpha = 0.01 : 61.79775280898876 %\n", "Replicability at alpha = 0.005 : 42.69662921348314 %\n", "Replicability at alpha = 0.001 : 12.359550561797752 %\n", "*****************************************************************\n", "netmats_parcor ridge PMAT24_A_CR max\n", "r discovery (with cv) : 0.2922426473940083 r replication: 0.301492004778787\n", "Replicability at alpha = 0.05 : 100.0 %\n", "Replicability at alpha = 0.01 : 100.0 %\n", "Replicability at alpha = 0.005 : 100.0 %\n", "Replicability at alpha = 0.001 : 100.0 %\n", "*****************************************************************\n", "netmats_parcor ridge CogFluidComp_AgeAdj max\n", "r discovery (with cv) : 0.3049801718197025 r replication: 0.2996751646332102\n", "Replicability at alpha = 0.05 : 100.0 %\n", "Replicability at alpha = 0.01 : 100.0 %\n", "Replicability at alpha = 0.005 : 100.0 %\n", "Replicability at alpha = 0.001 : 100.0 %\n", "*****************************************************************\n", "netmats_parcor ridge CogCrystalComp_AgeAdj max\n", "r discovery (with cv) : 0.509356447724508 r replication: 0.5252483106948896\n", "Replicability at alpha = 0.05 : 100.0 %\n", "Replicability at alpha = 0.01 : 100.0 %\n", "Replicability at alpha = 0.005 : 100.0 %\n", "Replicability at alpha = 0.001 : 100.0 %\n", "*****************************************************************\n", "netmats_parcor ridge CogEarlyComp_AgeAdj max\n", "r discovery (with cv) : 0.35010656810589025 r replication: 0.346212766761039\n", "Replicability at alpha = 0.05 : 100.0 %\n", "Replicability at alpha = 0.01 : 100.0 %\n", "Replicability at alpha = 0.005 : 100.0 %\n", "Replicability at alpha = 0.001 : 100.0 %\n", "*****************************************************************\n", "netmats_parcor ridge CogTotalComp_AgeAdj max\n", "r discovery (with cv) : 0.4726518883300038 r replication: 0.47903946590450225\n", "Replicability at alpha = 0.05 : 100.0 %\n", "Replicability at alpha = 0.01 : 100.0 %\n", "Replicability at alpha = 0.005 : 100.0 %\n", "Replicability at alpha = 0.001 : 100.0 %\n", "*****************************************************************\n", "netmats_parcor ridge ReadEng_AgeAdj max\n", "r discovery (with cv) : 0.450867003576911 r replication: 0.46022188861700536\n", "Replicability at alpha = 0.05 : 100.0 %\n", "Replicability at alpha = 0.01 : 100.0 %\n", "Replicability at alpha = 0.005 : 100.0 %\n", "Replicability at alpha = 0.001 : 100.0 %\n", "*****************************************************************\n", "netmats_parcor ridge PicVocab_AgeAdj max\n", "r discovery (with cv) : 0.4748017798587243 r replication: 0.4784423901738578\n", "Replicability at alpha = 0.05 : 100.0 %\n", "Replicability at alpha = 0.01 : 100.0 %\n", "Replicability at alpha = 0.005 : 100.0 %\n", "Replicability at alpha = 0.001 : 100.0 %\n", "*****************************************************************\n", "netmats_parcor ridge ProcSpeed_AgeAdj max\n", "r discovery (with cv) : 0.11764068520351804 r replication: 0.12249903623335612\n", "Replicability at alpha = 0.05 : 84.61538461538461 %\n", "Replicability at alpha = 0.01 : 38.46153846153847 %\n", "Replicability at alpha = 0.005 : 21.794871794871796 %\n", "Replicability at alpha = 0.001 : 2.564102564102564 %\n", "*****************************************************************\n", "netmats_parcor ridge DDisc_AUC_200 max\n", "r discovery (with cv) : 0.20884491911824618 r replication: 0.20706209788397087\n", "Replicability at alpha = 0.05 : 100.0 %\n", "Replicability at alpha = 0.01 : 99.0 %\n", "Replicability at alpha = 0.005 : 95.0 %\n", "Replicability at alpha = 0.001 : 83.0 %\n", "*****************************************************************\n", "netmats_parcor ridge VSPLOT_TC max\n", "r discovery (with cv) : 0.33041854229644096 r replication: 0.3361466183063388\n", "Replicability at alpha = 0.05 : 100.0 %\n", "Replicability at alpha = 0.01 : 100.0 %\n", "Replicability at alpha = 0.005 : 100.0 %\n", "Replicability at alpha = 0.001 : 100.0 %\n", "*****************************************************************\n", "netmats_parcor ridge SCPT_SEN max\n", "r discovery (with cv) : 0.004953219542766921 r replication: 0.005954925720074722\n", "Replicability at alpha = 0.05 : 0.0 %\n", "Replicability at alpha = 0.01 : 0.0 %\n", "Replicability at alpha = 0.005 : 0.0 %\n", "Replicability at alpha = 0.001 : 0.0 %\n", "*****************************************************************\n", "netmats_parcor ridge SCPT_SPEC max\n", "r discovery (with cv) : 0.1483580336589026 r replication: 0.15543127763406267\n", "Replicability at alpha = 0.05 : 100.0 %\n", "Replicability at alpha = 0.01 : 78.02197802197803 %\n", "Replicability at alpha = 0.005 : 57.14285714285714 %\n", "Replicability at alpha = 0.001 : 26.373626373626376 %\n", "*****************************************************************\n", "netmats_parcor ridge IWRD_TOT max\n", "r discovery (with cv) : 0.0810183743864671 r replication: 0.0739403812147235\n", "Replicability at alpha = 0.05 : 36.53846153846153 %\n", "Replicability at alpha = 0.01 : 3.8461538461538463 %\n", "Replicability at alpha = 0.005 : 0.0 %\n", "Replicability at alpha = 0.001 : 0.0 %\n", "*****************************************************************\n", "netmats_parcor ridge ListSort_AgeAdj max\n", "r discovery (with cv) : 0.27262179944410553 r replication: 0.27629338784789925\n", "Replicability at alpha = 0.05 : 100.0 %\n", "Replicability at alpha = 0.01 : 100.0 %\n", "Replicability at alpha = 0.005 : 100.0 %\n", "Replicability at alpha = 0.001 : 100.0 %\n", "*****************************************************************\n", "netmats_parcor ridge ER40ANG max\n", "r discovery (with cv) : 0.07290919946421724 r replication: 0.07930980325318832\n", "Replicability at alpha = 0.05 : 44.680851063829785 %\n", "Replicability at alpha = 0.01 : 2.127659574468085 %\n", "Replicability at alpha = 0.005 : 2.127659574468085 %\n", "Replicability at alpha = 0.001 : 0.0 %\n", "*****************************************************************\n", "netmats_parcor ridge ER40FEAR max\n", "r discovery (with cv) : 0.033788344197422604 r replication: 0.03217012035635588\n", "Replicability at alpha = 0.05 : 0.0 %\n", "Replicability at alpha = 0.01 : 0.0 %\n", "Replicability at alpha = 0.005 : 0.0 %\n", "Replicability at alpha = 0.001 : 0.0 %\n", "*****************************************************************\n", "netmats_parcor ridge ER40HAP max\n", "r discovery (with cv) : -0.05910616789376827 r replication: -0.040972411502727554\n", "Replicability at alpha = 0.05 : nan %\n", "Replicability at alpha = 0.01 : nan %\n", "Replicability at alpha = 0.005 : nan %\n", "Replicability at alpha = 0.001 : nan %\n", "*****************************************************************\n", "netmats_parcor ridge ER40NOE max\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ ":58: RuntimeWarning: invalid value encountered in long_scalars\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "r discovery (with cv) : 0.05015028148085295 r replication: 0.05477097859212915\n", "Replicability at alpha = 0.05 : 16.666666666666664 %\n", "Replicability at alpha = 0.01 : 0.0 %\n", "Replicability at alpha = 0.005 : 0.0 %\n", "Replicability at alpha = 0.001 : 0.0 %\n", "*****************************************************************\n", "netmats_parcor ridge ER40SAD max\n", "r discovery (with cv) : 0.03355879047628217 r replication: 0.043958511763540556\n", "Replicability at alpha = 0.05 : 10.0 %\n", "Replicability at alpha = 0.01 : 0.0 %\n", "Replicability at alpha = 0.005 : 0.0 %\n", "Replicability at alpha = 0.001 : 0.0 %\n", "*****************************************************************\n", "netmats_parcor ridge AngAffect_Unadj max\n", "r discovery (with cv) : 0.09675041204325666 r replication: 0.097384559509148\n", "Replicability at alpha = 0.05 : 69.23076923076923 %\n", "Replicability at alpha = 0.01 : 12.307692307692308 %\n", "Replicability at alpha = 0.005 : 1.5384615384615385 %\n", "Replicability at alpha = 0.001 : 0.0 %\n", "*****************************************************************\n", "netmats_parcor ridge AngHostil_Unadj max\n", "r discovery (with cv) : 0.06336555348912118 r replication: 0.07505183534895289\n", "Replicability at alpha = 0.05 : 41.02564102564102 %\n", "Replicability at alpha = 0.01 : 2.564102564102564 %\n", "Replicability at alpha = 0.005 : 2.564102564102564 %\n", "Replicability at alpha = 0.001 : 0.0 %\n", "*****************************************************************\n", "netmats_parcor ridge AngAggr_Unadj max\n", "r discovery (with cv) : 0.19405555852511736 r replication: 0.20052370789491036\n", "Replicability at alpha = 0.05 : 100.0 %\n", "Replicability at alpha = 0.01 : 95.0 %\n", "Replicability at alpha = 0.005 : 93.0 %\n", "Replicability at alpha = 0.001 : 77.0 %\n", "*****************************************************************\n", "netmats_parcor ridge FearAffect_Unadj max\n", "r discovery (with cv) : 0.15563217224852452 r replication: 0.15122577808620358\n", "Replicability at alpha = 0.05 : 100.0 %\n", "Replicability at alpha = 0.01 : 77.01149425287356 %\n", "Replicability at alpha = 0.005 : 66.66666666666666 %\n", "Replicability at alpha = 0.001 : 29.88505747126437 %\n", "*****************************************************************\n", "netmats_parcor ridge FearSomat_Unadj max\n", "r discovery (with cv) : 0.061101136887113376 r replication: 0.054246596855310567\n", "Replicability at alpha = 0.05 : 14.285714285714285 %\n", "Replicability at alpha = 0.01 : 0.0 %\n", "Replicability at alpha = 0.005 : 0.0 %\n", "Replicability at alpha = 0.001 : 0.0 %\n", "*****************************************************************\n", "netmats_parcor ridge FearSomat_Unadj max\n", "r discovery (with cv) : 0.061101136887113376 r replication: 0.054246596855310567\n", "Replicability at alpha = 0.05 : 14.285714285714285 %\n", "Replicability at alpha = 0.01 : 0.0 %\n", "Replicability at alpha = 0.005 : 0.0 %\n", "Replicability at alpha = 0.001 : 0.0 %\n", "*****************************************************************\n", "netmats_parcor ridge Sadness_Unadj max\n", "r discovery (with cv) : 0.1327824232969155 r replication: 0.1244556654760563\n", "Replicability at alpha = 0.05 : 94.04761904761905 %\n", "Replicability at alpha = 0.01 : 54.761904761904766 %\n", "Replicability at alpha = 0.005 : 35.714285714285715 %\n", "Replicability at alpha = 0.001 : 7.142857142857142 %\n", "*****************************************************************\n", "netmats_parcor ridge LifeSatisf_Unadj max\n", "r discovery (with cv) : 0.19873341230892952 r replication: 0.18440610179827896\n", "Replicability at alpha = 0.05 : 100.0 %\n", "Replicability at alpha = 0.01 : 95.87628865979381 %\n", "Replicability at alpha = 0.005 : 92.78350515463917 %\n", "Replicability at alpha = 0.001 : 67.0103092783505 %\n", "*****************************************************************\n", "netmats_parcor ridge MeanPurp_Unadj max\n", "r discovery (with cv) : 0.09984748458857128 r replication: 0.08905980179359992\n", "Replicability at alpha = 0.05 : 50.76923076923077 %\n", "Replicability at alpha = 0.01 : 15.384615384615385 %\n", "Replicability at alpha = 0.005 : 4.615384615384616 %\n", "Replicability at alpha = 0.001 : 0.0 %\n", "*****************************************************************\n", "netmats_parcor ridge PosAffect_Unadj max\n", "r discovery (with cv) : 0.09883821052746254 r replication: 0.08771575790081709\n", "Replicability at alpha = 0.05 : 55.223880597014926 %\n", "Replicability at alpha = 0.01 : 10.44776119402985 %\n", "Replicability at alpha = 0.005 : 5.970149253731343 %\n", "Replicability at alpha = 0.001 : 0.0 %\n", "*****************************************************************\n", "netmats_parcor ridge Friendship_Unadj max\n", "r discovery (with cv) : 0.08531254415798344 r replication: 0.08428001338641648\n", "Replicability at alpha = 0.05 : 44.0 %\n", "Replicability at alpha = 0.01 : 10.0 %\n", "Replicability at alpha = 0.005 : 4.0 %\n", "Replicability at alpha = 0.001 : 2.0 %\n", "*****************************************************************\n", "netmats_parcor ridge Loneliness_Unadj max\n", "r discovery (with cv) : 0.10181181257724871 r replication: 0.10338556615021999\n", "Replicability at alpha = 0.05 : 71.66666666666667 %\n", "Replicability at alpha = 0.01 : 26.666666666666668 %\n", "Replicability at alpha = 0.005 : 6.666666666666667 %\n", "Replicability at alpha = 0.001 : 0.0 %\n", "*****************************************************************\n", "netmats_parcor ridge PercHostil_Unadj max\n", "r discovery (with cv) : 0.037622230965088856 r replication: 0.04500642736764501\n", "Replicability at alpha = 0.05 : 0.0 %\n", "Replicability at alpha = 0.01 : 0.0 %\n", "Replicability at alpha = 0.005 : 0.0 %\n", "Replicability at alpha = 0.001 : 0.0 %\n", "*****************************************************************\n", "netmats_parcor ridge PercReject_Unadj max\n", "r discovery (with cv) : 0.03688725729680184 r replication: 0.03543635186966549\n", "Replicability at alpha = 0.05 : 4.3478260869565215 %\n", "Replicability at alpha = 0.01 : 0.0 %\n", "Replicability at alpha = 0.005 : 0.0 %\n", "Replicability at alpha = 0.001 : 0.0 %\n", "*****************************************************************\n", "netmats_parcor ridge EmotSupp_Unadj max\n", "r discovery (with cv) : 0.1260287186553436 r replication: 0.12226413039802284\n", "Replicability at alpha = 0.05 : 96.55172413793103 %\n", "Replicability at alpha = 0.01 : 43.67816091954023 %\n", "Replicability at alpha = 0.005 : 28.735632183908045 %\n", "Replicability at alpha = 0.001 : 9.195402298850574 %\n", "*****************************************************************\n", "netmats_parcor ridge InstruSupp_Unadj max\n", "r discovery (with cv) : 0.04191974313059748 r replication: 0.03262089935367679\n", "Replicability at alpha = 0.05 : 4.166666666666666 %\n", "Replicability at alpha = 0.01 : 0.0 %\n", "Replicability at alpha = 0.005 : 0.0 %\n", "Replicability at alpha = 0.001 : 0.0 %\n", "*****************************************************************\n", "netmats_parcor ridge PercStress_Unadj max\n", "r discovery (with cv) : 0.10021102140816969 r replication: 0.09269067112373008\n", "Replicability at alpha = 0.05 : 62.5 %\n", "Replicability at alpha = 0.01 : 14.0625 %\n", "Replicability at alpha = 0.005 : 6.25 %\n", "Replicability at alpha = 0.001 : 0.0 %\n", "*****************************************************************\n", "netmats_parcor ridge SelfEff_Unadj max\n", "r discovery (with cv) : 0.09990355623342788 r replication: 0.10774220880935355\n", "Replicability at alpha = 0.05 : 81.81818181818183 %\n", "Replicability at alpha = 0.01 : 28.78787878787879 %\n", "Replicability at alpha = 0.005 : 12.121212121212121 %\n", "Replicability at alpha = 0.001 : 1.5151515151515151 %\n", "*****************************************************************\n", "netmats_parcor ridge Dexterity_AgeAdj max\n", "r discovery (with cv) : 0.1698377324671013 r replication: 0.16739913235673143\n", "Replicability at alpha = 0.05 : 98.9247311827957 %\n", "Replicability at alpha = 0.01 : 88.17204301075269 %\n", "Replicability at alpha = 0.005 : 81.72043010752688 %\n", "Replicability at alpha = 0.001 : 51.61290322580645 %\n", "*****************************************************************\n", "netmats_parcor ridge NEOFAC_A max\n", "r discovery (with cv) : 0.19226360180293334 r replication: 0.2105816660368088\n", "Replicability at alpha = 0.05 : 100.0 %\n", "Replicability at alpha = 0.01 : 97.95918367346938 %\n", "Replicability at alpha = 0.005 : 94.89795918367348 %\n", "Replicability at alpha = 0.001 : 82.6530612244898 %\n", "*****************************************************************\n", "netmats_parcor ridge NEOFAC_O max\n", "r discovery (with cv) : 0.26505077367461527 r replication: 0.28578623242498713\n", "Replicability at alpha = 0.05 : 100.0 %\n", "Replicability at alpha = 0.01 : 100.0 %\n", "Replicability at alpha = 0.005 : 100.0 %\n", "Replicability at alpha = 0.001 : 98.0 %\n", "*****************************************************************\n", "netmats_parcor ridge NEOFAC_C max\n", "r discovery (with cv) : 0.17973714484091477 r replication: 0.19354830382121097\n", "Replicability at alpha = 0.05 : 100.0 %\n", "Replicability at alpha = 0.01 : 92.78350515463917 %\n", "Replicability at alpha = 0.005 : 90.72164948453609 %\n", "Replicability at alpha = 0.001 : 64.94845360824742 %\n", "*****************************************************************\n", "netmats_parcor ridge NEOFAC_N max\n", "r discovery (with cv) : 0.07314281912604746 r replication: 0.06696149072259926\n", "Replicability at alpha = 0.05 : 40.0 %\n", "Replicability at alpha = 0.01 : 2.0 %\n", "Replicability at alpha = 0.005 : 2.0 %\n", "Replicability at alpha = 0.001 : 0.0 %\n", "*****************************************************************\n", "netmats_parcor ridge NEOFAC_E max\n", "r discovery (with cv) : 0.15791961819822467 r replication: 0.16447031944134946\n", "Replicability at alpha = 0.05 : 98.93617021276596 %\n", "Replicability at alpha = 0.01 : 84.04255319148936 %\n", "Replicability at alpha = 0.005 : 70.2127659574468 %\n", "Replicability at alpha = 0.001 : 35.1063829787234 %\n", "*****************************************************************\n", "netmats_parcor ridge Noise_Comp max\n", "r discovery (with cv) : 0.13754389395248953 r replication: 0.13277944361089927\n", "Replicability at alpha = 0.05 : 97.67441860465115 %\n", "Replicability at alpha = 0.01 : 62.7906976744186 %\n", "Replicability at alpha = 0.005 : 46.51162790697674 %\n", "Replicability at alpha = 0.001 : 10.465116279069768 %\n", "*****************************************************************\n", "netmats_parcor ridge Odor_AgeAdj max\n", "r discovery (with cv) : 0.03422255970866912 r replication: 0.019033276660184433\n", "Replicability at alpha = 0.05 : 0.0 %\n", "Replicability at alpha = 0.01 : 0.0 %\n", "Replicability at alpha = 0.005 : 0.0 %\n", "Replicability at alpha = 0.001 : 0.0 %\n", "*****************************************************************\n", "netmats_parcor ridge PainInterf_Tscore max\n", "r discovery (with cv) : 0.03274152032524882 r replication: 0.03329128578051491\n", "Replicability at alpha = 0.05 : 0.0 %\n", "Replicability at alpha = 0.01 : 0.0 %\n", "Replicability at alpha = 0.005 : 0.0 %\n", "Replicability at alpha = 0.001 : 0.0 %\n", "*****************************************************************\n", "netmats_parcor ridge Taste_AgeAdj max\n", "r discovery (with cv) : 0.0944194746807131 r replication: 0.08063900075841376\n", "Replicability at alpha = 0.05 : 43.103448275862064 %\n", "Replicability at alpha = 0.01 : 5.172413793103448 %\n", "Replicability at alpha = 0.005 : 0.0 %\n", "Replicability at alpha = 0.001 : 0.0 %\n", "*****************************************************************\n", "netmats_parcor ridge Mars_Final max\n", "r discovery (with cv) : 0.0815295729161019 r replication: 0.02266341772170401\n", "Replicability at alpha = 0.05 : 6.666666666666667 %\n", "Replicability at alpha = 0.01 : 0.0 %\n", "Replicability at alpha = 0.005 : 0.0 %\n", "Replicability at alpha = 0.001 : 0.0 %\n", "CPU times: user 38 s, sys: 5.9 s, total: 43.9 s\n", "Wall time: 44min 5s\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
connectivitymodeltargetnr_discovery_cvr_discovery_overfitr_replicationp_discovery_cvp_discovery_overfitp_replication
0netmats_parcorridgeage5010.4806791.00.5147600.0009990.0009990.000999
1netmats_parcorridgeage5010.4772721.00.4550210.0009990.0009990.000999
2netmats_parcorridgeage5010.5286411.00.4826120.0009990.0009990.000999
3netmats_parcorridgeage5010.4888561.00.5028990.0009990.0009990.000999
4netmats_parcorridgeage5010.4291661.00.5302740.0009990.0009990.000999
.................................
5395netmats_parcorridgeMars_Final4990.0906191.00.0316580.0699300.0009990.115884
5396netmats_parcorridgeMars_Final4990.0056791.00.0381460.5654350.0009990.147852
5397netmats_parcorridgeMars_Final4990.0172331.00.0574840.4495500.0009990.106893
5398netmats_parcorridgeMars_Final4990.0097011.00.0052760.5264740.0009990.400599
5399netmats_parcorridgeMars_Final499-0.0322891.00.0177250.7232770.0009990.254745
\n", "

5400 rows × 10 columns

\n", "
" ], "text/plain": [ " connectivity model target n r_discovery_cv \\\n", "0 netmats_parcor ridge age 501 0.480679 \n", "1 netmats_parcor ridge age 501 0.477272 \n", "2 netmats_parcor ridge age 501 0.528641 \n", "3 netmats_parcor ridge age 501 0.488856 \n", "4 netmats_parcor ridge age 501 0.429166 \n", "... ... ... ... ... ... \n", "5395 netmats_parcor ridge Mars_Final 499 0.090619 \n", "5396 netmats_parcor ridge Mars_Final 499 0.005679 \n", "5397 netmats_parcor ridge Mars_Final 499 0.017233 \n", "5398 netmats_parcor ridge Mars_Final 499 0.009701 \n", "5399 netmats_parcor ridge Mars_Final 499 -0.032289 \n", "\n", " r_discovery_overfit r_replication p_discovery_cv p_discovery_overfit \\\n", "0 1.0 0.514760 0.000999 0.000999 \n", "1 1.0 0.455021 0.000999 0.000999 \n", "2 1.0 0.482612 0.000999 0.000999 \n", "3 1.0 0.502899 0.000999 0.000999 \n", "4 1.0 0.530274 0.000999 0.000999 \n", "... ... ... ... ... \n", "5395 1.0 0.031658 0.069930 0.000999 \n", "5396 1.0 0.038146 0.565435 0.000999 \n", "5397 1.0 0.057484 0.449550 0.000999 \n", "5398 1.0 0.005276 0.526474 0.000999 \n", "5399 1.0 0.017725 0.723277 0.000999 \n", "\n", " p_replication \n", "0 0.000999 \n", "1 0.000999 \n", "2 0.000999 \n", "3 0.000999 \n", "4 0.000999 \n", "... ... \n", "5395 0.115884 \n", "5396 0.147852 \n", "5397 0.106893 \n", "5398 0.400599 \n", "5399 0.254745 \n", "\n", "[5400 rows x 10 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%time\n", "\n", "random_state = 42\n", "n_bootstrap = 100\n", "\n", "features = {\n", " 'netmats_parcor': netmats_parcor#,\n", " #'netmats_pearson': netmats_pearson\n", "}\n", "\n", "models = {\n", " 'ridge': Ridge()\n", "}\n", "\n", "# We aggregate all results here:\n", "df = pd.DataFrame(columns=['connectivity','model','target','n','r_discovery_cv','r_discovery_overfit','r_replication','p_discovery_cv','p_discovery_overfit','p_replication'])\n", "\n", "for feature_set in features:\n", " for model in models:\n", " for target_var in targets:\n", " for sample_size in ['max']:\n", "\n", " print('*****************************************************************')\n", " print(feature_set, model, target_var, sample_size)\n", "\n", " X, y = create_data(target=target_var, feature_data=features[feature_set])\n", "\n", " if sample_size=='max':\n", " sample_size = int(len(y)/2)\n", "\n", " # create random seeds for each bootstrap iteration for reproducibility\n", " rng = np.random.default_rng(random_state)\n", " random_sates = rng.integers(np.iinfo(np.int32).max, size=n_bootstrap)\n", "\n", " # run bootstrap iterations in parallel\n", " r_discovery_cv, r_discovery_overfit, r_replication, p_discovery_cv, p_discovery_overfit, p_replication = zip(\n", " *Parallel(n_jobs=-1)(\n", " delayed(bootstrap_workhorse)(X, y, sample_size, models[model], seed) for seed in random_sates))\n", "\n", " tmp_data_frame = pd.DataFrame({\n", " 'connectivity' : feature_set,\n", " 'model' : model,\n", " 'target' : target_var,\n", " 'n' : sample_size,\n", " 'r_discovery_cv': r_discovery_cv,\n", " 'r_discovery_overfit': r_discovery_overfit,\n", " 'r_replication': r_replication,\n", " 'p_discovery_cv': p_discovery_cv,\n", " 'p_discovery_overfit': p_discovery_overfit,\n", " 'p_replication': p_replication\n", " })\n", " #sns.scatterplot(x='r_replication', y='r_discovery_cv', data=tmp_data_frame)\n", " #plt.ylabel('in-sample (r)')\n", " #plt.xlabel('out-of-sample (r_pred)')\n", " #plt.show()\n", " print('r discovery (with cv) :', tmp_data_frame.r_discovery_cv.mean(), 'r replication:', tmp_data_frame.r_replication.mean())\n", "\n", " for alpha in [0.05, 0.01, 0.005, 0.001]:\n", " print('Replicability at alpha =', alpha, ':',\n", " (tmp_data_frame.loc[tmp_data_frame['p_discovery_cv']\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
r_discovery_cvr_replication
connectivitymodeltargetn
netmats_parcorridgeAngAffect_Unadj5010.0967500.097385
AngAggr_Unadj5010.1940560.200524
AngHostil_Unadj5010.0633660.075052
CardSort_AgeAdj5000.1789660.189577
CogCrystalComp_AgeAdj4990.5093560.525248
CogEarlyComp_AgeAdj4970.3501070.346213
CogFluidComp_AgeAdj4960.3049800.299675
CogTotalComp_AgeAdj4950.4726520.479039
DDisc_AUC_2005000.2088450.207062
Dexterity_AgeAdj5010.1698380.167399
ER40ANG5000.0729090.079310
ER40FEAR5000.0337880.032170
ER40HAP500-0.059106-0.040972
ER40NOE5000.0501500.054771
ER40SAD5000.0335590.043959
EmotSupp_Unadj5010.1260290.122264
FearAffect_Unadj5010.1556320.151226
FearSomat_Unadj5010.0611010.054247
Flanker_AgeAdj5010.1322610.140226
Friendship_Unadj5010.0853130.084280
IWRD_TOT5000.0810180.073940
InstruSupp_Unadj5010.0419200.032621
LifeSatisf_Unadj5010.1987330.184406
ListSort_AgeAdj5010.2726220.276293
Loneliness_Unadj5010.1018120.103386
MMSE_Score5010.1123520.129704
Mars_Final4990.0815300.022663
MeanPurp_Unadj5010.0998470.089060
NEOFAC_A5000.1922640.210582
NEOFAC_C5000.1797370.193548
NEOFAC_E5000.1579200.164470
NEOFAC_N5000.0731430.066961
NEOFAC_O5000.2650510.285786
Noise_Comp4980.1375440.132779
Odor_AgeAdj5000.0342230.019033
PMAT24_A_CR4990.2922430.301492
PSQI_Score5010.1429130.145200
PainInterf_Tscore5010.0327420.033291
PercHostil_Unadj5010.0376220.045006
PercReject_Unadj5010.0368870.035436
PercStress_Unadj5010.1002110.092691
PicSeq_AgeAdj5010.1823310.185931
PicVocab_AgeAdj5010.4748020.478442
PosAffect_Unadj5010.0988380.087716
ProcSpeed_AgeAdj5010.1176410.122499
ReadEng_AgeAdj5010.4508670.460222
SCPT_SEN5000.0049530.005955
SCPT_SPEC5000.1483580.155431
Sadness_Unadj5010.1327820.124456
SelfEff_Unadj5010.0999040.107742
Taste_AgeAdj4990.0944190.080639
VSPLOT_TC5000.3304190.336147
age5010.4802080.489671
\n", "" ], "text/plain": [ " r_discovery_cv r_replication\n", "connectivity model target n \n", "netmats_parcor ridge AngAffect_Unadj 501 0.096750 0.097385\n", " AngAggr_Unadj 501 0.194056 0.200524\n", " AngHostil_Unadj 501 0.063366 0.075052\n", " CardSort_AgeAdj 500 0.178966 0.189577\n", " CogCrystalComp_AgeAdj 499 0.509356 0.525248\n", " CogEarlyComp_AgeAdj 497 0.350107 0.346213\n", " CogFluidComp_AgeAdj 496 0.304980 0.299675\n", " CogTotalComp_AgeAdj 495 0.472652 0.479039\n", " DDisc_AUC_200 500 0.208845 0.207062\n", " Dexterity_AgeAdj 501 0.169838 0.167399\n", " ER40ANG 500 0.072909 0.079310\n", " ER40FEAR 500 0.033788 0.032170\n", " ER40HAP 500 -0.059106 -0.040972\n", " ER40NOE 500 0.050150 0.054771\n", " ER40SAD 500 0.033559 0.043959\n", " EmotSupp_Unadj 501 0.126029 0.122264\n", " FearAffect_Unadj 501 0.155632 0.151226\n", " FearSomat_Unadj 501 0.061101 0.054247\n", " Flanker_AgeAdj 501 0.132261 0.140226\n", " Friendship_Unadj 501 0.085313 0.084280\n", " IWRD_TOT 500 0.081018 0.073940\n", " InstruSupp_Unadj 501 0.041920 0.032621\n", " LifeSatisf_Unadj 501 0.198733 0.184406\n", " ListSort_AgeAdj 501 0.272622 0.276293\n", " Loneliness_Unadj 501 0.101812 0.103386\n", " MMSE_Score 501 0.112352 0.129704\n", " Mars_Final 499 0.081530 0.022663\n", " MeanPurp_Unadj 501 0.099847 0.089060\n", " NEOFAC_A 500 0.192264 0.210582\n", " NEOFAC_C 500 0.179737 0.193548\n", " NEOFAC_E 500 0.157920 0.164470\n", " NEOFAC_N 500 0.073143 0.066961\n", " NEOFAC_O 500 0.265051 0.285786\n", " Noise_Comp 498 0.137544 0.132779\n", " Odor_AgeAdj 500 0.034223 0.019033\n", " PMAT24_A_CR 499 0.292243 0.301492\n", " PSQI_Score 501 0.142913 0.145200\n", " PainInterf_Tscore 501 0.032742 0.033291\n", " PercHostil_Unadj 501 0.037622 0.045006\n", " PercReject_Unadj 501 0.036887 0.035436\n", " PercStress_Unadj 501 0.100211 0.092691\n", " PicSeq_AgeAdj 501 0.182331 0.185931\n", " PicVocab_AgeAdj 501 0.474802 0.478442\n", " PosAffect_Unadj 501 0.098838 0.087716\n", " ProcSpeed_AgeAdj 501 0.117641 0.122499\n", " ReadEng_AgeAdj 501 0.450867 0.460222\n", " SCPT_SEN 500 0.004953 0.005955\n", " SCPT_SPEC 500 0.148358 0.155431\n", " Sadness_Unadj 501 0.132782 0.124456\n", " SelfEff_Unadj 501 0.099904 0.107742\n", " Taste_AgeAdj 499 0.094419 0.080639\n", " VSPLOT_TC 500 0.330419 0.336147\n", " age 501 0.480208 0.489671" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.groupby(['connectivity', 'model', 'target', 'n']).mean()[['r_discovery_cv', 'r_replication']]" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_19024/3478190779.py:9: RuntimeWarning: invalid value encountered in long_scalars\n", " return (x.loc[x['p_discovery_cv']", "image/png": "\n" }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df=pd.read_csv('res/revised_results_Ridge.csv')\n", "sns.set(rc={\"figure.figsize\":(14, 2)})\n", "sns.set_style(\"white\")\n", "\n", "phenotypes = pd.read_csv('hcp_data/phenotypes-of-interest.csv').iloc[:, 1:]\n", "phenotypes = phenotypes.drop([2,3]) # Age duplicates\n", "\n", "def prep(x, alpha=0.05):\n", " return (x.loc[x['p_discovery_cv']", "image/png": "\n" }, "metadata": {}, "output_type": "display_data" } ], "source": [ "ax=sns.barplot(data=df_plot, x='phenotype', y='r_discovery', palette=colors)\n", "plt.xticks(rotation=90)\n", "ax.axhline(0.074, color='black', linestyle=':') # parametric one-sided significance threshold\n", "ax.axhline(0, color='black')\n", "plt.savefig('fig/r_discovery_all_phenotypes.pdf')" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "ename": "ValueError", "evalue": "Value of 'hover_data_1' is not the name of a column in 'data_frame'. Expected one of [0, 'r_discovery', 'category', 'phenotype'] but received: phenotype_long", "output_type": "error", "traceback": [ "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", "\u001B[0;31mValueError\u001B[0m Traceback (most recent call last)", "Cell \u001B[0;32mIn[5], line 3\u001B[0m\n\u001B[1;32m 1\u001B[0m \u001B[38;5;28;01mimport\u001B[39;00m \u001B[38;5;21;01mplotly\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mexpress\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m \u001B[38;5;21;01mpx\u001B[39;00m\n\u001B[0;32m----> 3\u001B[0m fig \u001B[38;5;241m=\u001B[39m \u001B[43mpx\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mbar\u001B[49m\u001B[43m(\u001B[49m\u001B[43mdf_plot\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43my\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;241;43m0\u001B[39;49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mx\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[38;5;124;43mphenotype\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mtext_auto\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[38;5;124;43m.2s\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[43m,\u001B[49m\n\u001B[1;32m 4\u001B[0m \u001B[43m \u001B[49m\u001B[43mtitle\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mReplication probability with n=500\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m,\u001B[49m\n\u001B[1;32m 5\u001B[0m \u001B[43m \u001B[49m\u001B[43mlabels\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43m{\u001B[49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[38;5;124;43m0\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[43m:\u001B[49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[38;5;124;43m$P_\u001B[39;49m\u001B[38;5;132;43;01m{rep}\u001B[39;49;00m\u001B[38;5;124;43m$\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[43m}\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 6\u001B[0m \u001B[43m \u001B[49m\u001B[43mhover_data\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43m[\u001B[49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[38;5;124;43mcategory\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[38;5;124;43mphenotype_long\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[38;5;124;43mr_discovery\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[43m]\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mcolor\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[38;5;124;43mcategory\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[43m)\u001B[49m\n\u001B[1;32m 7\u001B[0m fig\u001B[38;5;241m.\u001B[39mupdate_traces(textfont_size\u001B[38;5;241m=\u001B[39m\u001B[38;5;241m12\u001B[39m, textangle\u001B[38;5;241m=\u001B[39m\u001B[38;5;241m0\u001B[39m, textposition\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124moutside\u001B[39m\u001B[38;5;124m\"\u001B[39m, cliponaxis\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mFalse\u001B[39;00m)\n\u001B[1;32m 8\u001B[0m fig\u001B[38;5;241m.\u001B[39mshow()\n", "File \u001B[0;32m~/src/BWAS_comment/venv/lib/python3.10/site-packages/plotly/express/_chart_types.py:373\u001B[0m, in \u001B[0;36mbar\u001B[0;34m(data_frame, x, y, color, pattern_shape, facet_row, facet_col, facet_col_wrap, facet_row_spacing, facet_col_spacing, hover_name, hover_data, custom_data, text, base, error_x, error_x_minus, error_y, error_y_minus, animation_frame, animation_group, category_orders, labels, color_discrete_sequence, color_discrete_map, color_continuous_scale, pattern_shape_sequence, pattern_shape_map, range_color, color_continuous_midpoint, opacity, orientation, barmode, log_x, log_y, range_x, range_y, text_auto, title, template, width, height)\u001B[0m\n\u001B[1;32m 325\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mbar\u001B[39m(\n\u001B[1;32m 326\u001B[0m data_frame\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mNone\u001B[39;00m,\n\u001B[1;32m 327\u001B[0m x\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mNone\u001B[39;00m,\n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 367\u001B[0m height\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mNone\u001B[39;00m,\n\u001B[1;32m 368\u001B[0m ) \u001B[38;5;241m-\u001B[39m\u001B[38;5;241m>\u001B[39m go\u001B[38;5;241m.\u001B[39mFigure:\n\u001B[1;32m 369\u001B[0m \u001B[38;5;124;03m\"\"\"\u001B[39;00m\n\u001B[1;32m 370\u001B[0m \u001B[38;5;124;03m In a bar plot, each row of `data_frame` is represented as a rectangular\u001B[39;00m\n\u001B[1;32m 371\u001B[0m \u001B[38;5;124;03m mark.\u001B[39;00m\n\u001B[1;32m 372\u001B[0m \u001B[38;5;124;03m \"\"\"\u001B[39;00m\n\u001B[0;32m--> 373\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mmake_figure\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m 374\u001B[0m \u001B[43m \u001B[49m\u001B[43margs\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43mlocals\u001B[39;49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 375\u001B[0m \u001B[43m \u001B[49m\u001B[43mconstructor\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mgo\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mBar\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 376\u001B[0m \u001B[43m \u001B[49m\u001B[43mtrace_patch\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43mdict\u001B[39;49m\u001B[43m(\u001B[49m\u001B[43mtextposition\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mauto\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m)\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 377\u001B[0m \u001B[43m \u001B[49m\u001B[43mlayout_patch\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43mdict\u001B[39;49m\u001B[43m(\u001B[49m\u001B[43mbarmode\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mbarmode\u001B[49m\u001B[43m)\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 378\u001B[0m \u001B[43m \u001B[49m\u001B[43m)\u001B[49m\n", "File \u001B[0;32m~/src/BWAS_comment/venv/lib/python3.10/site-packages/plotly/express/_core.py:1990\u001B[0m, in \u001B[0;36mmake_figure\u001B[0;34m(args, constructor, trace_patch, layout_patch)\u001B[0m\n\u001B[1;32m 1987\u001B[0m layout_patch \u001B[38;5;241m=\u001B[39m layout_patch \u001B[38;5;129;01mor\u001B[39;00m {}\n\u001B[1;32m 1988\u001B[0m apply_default_cascade(args)\n\u001B[0;32m-> 1990\u001B[0m args \u001B[38;5;241m=\u001B[39m \u001B[43mbuild_dataframe\u001B[49m\u001B[43m(\u001B[49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mconstructor\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 1991\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m constructor \u001B[38;5;129;01min\u001B[39;00m [go\u001B[38;5;241m.\u001B[39mTreemap, go\u001B[38;5;241m.\u001B[39mSunburst, go\u001B[38;5;241m.\u001B[39mIcicle] \u001B[38;5;129;01mand\u001B[39;00m args[\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mpath\u001B[39m\u001B[38;5;124m\"\u001B[39m] \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[1;32m 1992\u001B[0m args \u001B[38;5;241m=\u001B[39m process_dataframe_hierarchy(args)\n", "File \u001B[0;32m~/src/BWAS_comment/venv/lib/python3.10/site-packages/plotly/express/_core.py:1405\u001B[0m, in \u001B[0;36mbuild_dataframe\u001B[0;34m(args, constructor)\u001B[0m\n\u001B[1;32m 1402\u001B[0m args[\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mcolor\u001B[39m\u001B[38;5;124m\"\u001B[39m] \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m\n\u001B[1;32m 1403\u001B[0m \u001B[38;5;66;03m# now that things have been prepped, we do the systematic rewriting of `args`\u001B[39;00m\n\u001B[0;32m-> 1405\u001B[0m df_output, wide_id_vars \u001B[38;5;241m=\u001B[39m \u001B[43mprocess_args_into_dataframe\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m 1406\u001B[0m \u001B[43m \u001B[49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mwide_mode\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mvar_name\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mvalue_name\u001B[49m\n\u001B[1;32m 1407\u001B[0m \u001B[43m\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 1409\u001B[0m \u001B[38;5;66;03m# now that `df_output` exists and `args` contains only references, we complete\u001B[39;00m\n\u001B[1;32m 1410\u001B[0m \u001B[38;5;66;03m# the special-case and wide-mode handling by further rewriting args and/or mutating\u001B[39;00m\n\u001B[1;32m 1411\u001B[0m \u001B[38;5;66;03m# df_output\u001B[39;00m\n\u001B[1;32m 1413\u001B[0m count_name \u001B[38;5;241m=\u001B[39m _escape_col_name(df_output, \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mcount\u001B[39m\u001B[38;5;124m\"\u001B[39m, [var_name, value_name])\n", "File \u001B[0;32m~/src/BWAS_comment/venv/lib/python3.10/site-packages/plotly/express/_core.py:1207\u001B[0m, in \u001B[0;36mprocess_args_into_dataframe\u001B[0;34m(args, wide_mode, var_name, value_name)\u001B[0m\n\u001B[1;32m 1205\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m argument \u001B[38;5;241m==\u001B[39m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mindex\u001B[39m\u001B[38;5;124m\"\u001B[39m:\n\u001B[1;32m 1206\u001B[0m err_msg \u001B[38;5;241m+\u001B[39m\u001B[38;5;241m=\u001B[39m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;130;01m\\n\u001B[39;00m\u001B[38;5;124m To use the index, pass it in directly as `df.index`.\u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[0;32m-> 1207\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(err_msg)\n\u001B[1;32m 1208\u001B[0m \u001B[38;5;28;01melif\u001B[39;00m length \u001B[38;5;129;01mand\u001B[39;00m \u001B[38;5;28mlen\u001B[39m(df_input[argument]) \u001B[38;5;241m!=\u001B[39m length:\n\u001B[1;32m 1209\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\n\u001B[1;32m 1210\u001B[0m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mAll arguments should have the same length. \u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[1;32m 1211\u001B[0m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mThe length of column argument `df[\u001B[39m\u001B[38;5;132;01m%s\u001B[39;00m\u001B[38;5;124m]` is \u001B[39m\u001B[38;5;132;01m%d\u001B[39;00m\u001B[38;5;124m, whereas the \u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 1218\u001B[0m )\n\u001B[1;32m 1219\u001B[0m )\n", "\u001B[0;31mValueError\u001B[0m: Value of 'hover_data_1' is not the name of a column in 'data_frame'. Expected one of [0, 'r_discovery', 'category', 'phenotype'] but received: phenotype_long" ] } ], "source": [ "import plotly.express as px\n", "\n", "fig = px.bar(df_plot, y=0, x='phenotype', text_auto='.2s',\n", " title=\"Replication probability with n=500\",\n", " labels={'0':'$P_{rep}$'},\n", " hover_data=['category', 'phenotype_long', 'r_discovery'], color='category')\n", "fig.update_traces(textfont_size=12, textangle=0, textposition=\"outside\", cliponaxis=False)\n", "fig.show()\n", "\n" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_19024/1079767499.py:9: FutureWarning:\n", "\n", "The `inplace` parameter in pandas.Categorical.reorder_categories is deprecated and will be removed in a future version. Reordering categories will always return a new Categorical object.\n", "\n" ] } ], "source": [ "import plotly\n", "import plotly.graph_objs as go\n", "\n", "colors = [\"#de324c\",\"#f4895f\",\"#f8e16f\",\"#95cf92\",\"#9656a2\",\"#369acc\"]\n", "\n", "cols = [ colors[c] for c in pd.Categorical(df_plot['category']).codes]\n", "\n", "df_plot['category'] = pd.Categorical(df_plot['category'])\n", "df_plot['category'].cat.reorder_categories(['Emotion', 'Sensory', 'Motor', 'Alertness', 'Personality', 'Cognition'], inplace= True)\n", "df_plot = df_plot.sort_values(by=['category', 'r_discovery'], ascending=False)\n", "\n", "trace1l = go.Scatter( name='',\n", " x=df_plot['phenotype'].values,\n", " y=[80] * len(df_plot['phenotype'].values),\n", " mode='lines',\n", " line = dict(color='gray', width=1, dash='dash'),\n", " showlegend=False,\n", " hovertemplate=\"Replication threshold\"\n", " )\n", "\n", "trace1 = go.Bar(x=df_plot['phenotype'].values,y=df_plot[0].values, name='',\n", " marker_color=cols, showlegend=False,\n", " customdata=df_plot.values,\n", " #texttemplate=\"%{y:0.0f}%\",\n", " textposition=\"outside\",\n", " textangle=0,\n", " textfont_color=\"gray\",\n", " hovertemplate=\"
\".join([\n", " \"Domain: %{customdata[2]}\",\n", " \"Effect Size: r=%{customdata[1]:.2f}\",\n", " \"Replication Probability: %{customdata[0]:.0f}%\",\n", " \"Description: %{customdata[4]}\"\n", " ])\n", " #hovertext=pd.Categorical(df_plot['category'])\n", " )\n", "\n", "trace2l = go.Scatter( name='',\n", " x=df_plot['phenotype'].values,\n", " y=[0.074] * len(df_plot['phenotype'].values),\n", " mode='lines',\n", " line = dict(color='gray', width=1, dash='dash'),\n", " showlegend=False,\n", " hovertemplate=\"Significance threshold\",\n", " visible=False\n", " )\n", "\n", "\n", "trace2 = go.Bar(x=df_plot['phenotype'].values,y=df_plot['r_discovery'], visible=False, name='',\n", " marker_color=cols, showlegend=False,\n", " customdata=df_plot.values,\n", " #texttemplate=\"%{y:0.2f}\",\n", " textposition=\"outside\",\n", " textangle=0,\n", " textfont_color=\"gray\",\n", " hovertemplate=\"
\".join([\n", " \"Domain: %{customdata[2]}\",\n", " \"Effect Size: r=%{customdata[1]:.2f}\",\n", " \"Replication Probability: %{customdata[0]:.0f}%\",\n", " \"Description: %{customdata[4]}\"\n", " ])\n", " )\n", "data = [trace1l, trace1, trace2l, trace2]\n", "\n", "updatemenus = list([\n", " dict(active=0,\n", " showactive = True,\n", " buttons=list([\n", " dict(label = \"Replication Probability\",\n", " method = \"update\",\n", " args = [{\"visible\": [True, True, False, False]}]), # hide trace2\n", " dict(label = \"Predictive Effect Size\",\n", " method = \"update\",\n", " args = [{\"visible\": [False, False, True, True]}]) # hide trace1\n", " ]),\n", " x = 0,\n", " xanchor = 'left',\n", " y = 1.5,\n", " yanchor = 'top',\n", " )])\n", "\n", "layout = dict(title=\"\",\n", " showlegend=True,\n", " xaxis=dict(title=\"Phenotype\",\n", " showgrid=False, # thin lines in the background\n", " #showticklabels=False, # numbers below),\n", " zeroline=True # thick line at x=0\n", " ),\n", " updatemenus=updatemenus,\n", " template='simple_white',\n", " font=dict(\n", " size=10,\n", " color=\"gray\"),\n", " autosize=True,\n", " height=260,\n", " margin=dict(\n", " l=5,\n", " r=5,\n", " b=10,\n", " t=10,\n", " pad=4\n", " ),\n", " )\n", "\n", "fig=dict(data=data, layout=layout)\n", "\n", "#plotly.offline.plot(fig)\n", "#go.Figure(fig).show()\n", "import plotly.io as pio\n", "pio.write_html(fig, file='docs/_includes/figure.html', auto_open=True)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": " 0 r_discovery category \\\ntarget \nCogCrystalComp_AgeAdj 100.000000 0.509356 Cognition \nPicVocab_AgeAdj 100.000000 0.474802 Cognition \nCogTotalComp_AgeAdj 100.000000 0.472652 Cognition \nReadEng_AgeAdj 100.000000 0.450867 Cognition \nCogEarlyComp_AgeAdj 100.000000 0.350107 Cognition \nVSPLOT_TC 100.000000 0.330419 Cognition \nCogFluidComp_AgeAdj 100.000000 0.304980 Cognition \nPMAT24_A_CR 100.000000 0.292243 Cognition \nListSort_AgeAdj 100.000000 0.272622 Cognition \nDDisc_AUC_200 100.000000 0.208845 Cognition \nPicSeq_AgeAdj 100.000000 0.182331 Cognition \nCardSort_AgeAdj 100.000000 0.178966 Cognition \nSCPT_SPEC 100.000000 0.148358 Cognition \nFlanker_AgeAdj 98.876404 0.132261 Cognition \nProcSpeed_AgeAdj 84.615385 0.117641 Cognition \nIWRD_TOT 36.538462 0.081018 Cognition \nSCPT_SEN 0.000000 0.004953 Cognition \nNEOFAC_O 100.000000 0.265051 Personality \nNEOFAC_A 100.000000 0.192264 Personality \nNEOFAC_C 100.000000 0.179737 Personality \nNEOFAC_E 98.936170 0.157920 Personality \nNEOFAC_N 40.000000 0.073143 Personality \nPSQI_Score 98.850575 0.142913 Alertness \nMMSE_Score 89.873418 0.112352 Alertness \nDexterity_AgeAdj 98.924731 0.169838 Motor \nNoise_Comp 97.674419 0.137544 Sensory \nTaste_AgeAdj 43.103448 0.094419 Sensory \nMars_Final 6.666667 0.081530 Sensory \nOdor_AgeAdj 0.000000 0.034223 Sensory \nPainInterf_Tscore 0.000000 0.032742 Sensory \nLifeSatisf_Unadj 100.000000 0.198733 Emotion \nAngAggr_Unadj 100.000000 0.194056 Emotion \nFearAffect_Unadj 100.000000 0.155632 Emotion \nSadness_Unadj 94.047619 0.132782 Emotion \nEmotSupp_Unadj 96.551724 0.126029 Emotion \nLoneliness_Unadj 71.666667 0.101812 Emotion \nPercStress_Unadj 62.500000 0.100211 Emotion \nSelfEff_Unadj 81.818182 0.099904 Emotion \nMeanPurp_Unadj 50.769231 0.099847 Emotion \nPosAffect_Unadj 55.223881 0.098838 Emotion \nAngAffect_Unadj 69.230769 0.096750 Emotion \nFriendship_Unadj 44.000000 0.085313 Emotion \nER40ANG 44.680851 0.072909 Emotion \nAngHostil_Unadj 41.025641 0.063366 Emotion \nFearSomat_Unadj 14.285714 0.061101 Emotion \nER40NOE 16.666667 0.050150 Emotion \nInstruSupp_Unadj 4.166667 0.041920 Emotion \nPercHostil_Unadj 0.000000 0.037622 Emotion \nPercReject_Unadj 4.347826 0.036887 Emotion \nER40FEAR 0.000000 0.033788 Emotion \nER40SAD 10.000000 0.033559 Emotion \nER40HAP NaN -0.059106 Emotion \n\n phenotype \\\ntarget \nCogCrystalComp_AgeAdj CogCrystalComp_AgeAdj \nPicVocab_AgeAdj PicVocab_AgeAdj \nCogTotalComp_AgeAdj CogTotalComp_AgeAdj \nReadEng_AgeAdj ReadEng_AgeAdj \nCogEarlyComp_AgeAdj CogEarlyComp_AgeAdj \nVSPLOT_TC VSPLOT_TC \nCogFluidComp_AgeAdj CogFluidComp_AgeAdj \nPMAT24_A_CR PMAT24_A_CR \nListSort_AgeAdj ListSort_AgeAdj \nDDisc_AUC_200 DDisc_AUC_200 \nPicSeq_AgeAdj PicSeq_AgeAdj \nCardSort_AgeAdj CardSort_AgeAdj \nSCPT_SPEC SCPT_SPEC \nFlanker_AgeAdj Flanker_AgeAdj \nProcSpeed_AgeAdj ProcSpeed_AgeAdj \nIWRD_TOT IWRD_TOT \nSCPT_SEN SCPT_SEN \nNEOFAC_O NEOFAC_O \nNEOFAC_A NEOFAC_A \nNEOFAC_C NEOFAC_C \nNEOFAC_E NEOFAC_E \nNEOFAC_N NEOFAC_N \nPSQI_Score PSQI_Score \nMMSE_Score MMSE_Score \nDexterity_AgeAdj Dexterity_AgeAdj \nNoise_Comp Noise_Comp \nTaste_AgeAdj Taste_AgeAdj \nMars_Final Mars_Final \nOdor_AgeAdj Odor_AgeAdj \nPainInterf_Tscore PainInterf_Tscore \nLifeSatisf_Unadj LifeSatisf_Unadj \nAngAggr_Unadj AngAggr_Unadj \nFearAffect_Unadj FearAffect_Unadj \nSadness_Unadj Sadness_Unadj \nEmotSupp_Unadj EmotSupp_Unadj \nLoneliness_Unadj Loneliness_Unadj \nPercStress_Unadj PercStress_Unadj \nSelfEff_Unadj SelfEff_Unadj \nMeanPurp_Unadj MeanPurp_Unadj \nPosAffect_Unadj PosAffect_Unadj \nAngAffect_Unadj AngAffect_Unadj \nFriendship_Unadj Friendship_Unadj \nER40ANG ER40ANG \nAngHostil_Unadj AngHostil_Unadj \nFearSomat_Unadj FearSomat_Unadj \nER40NOE ER40NOE \nInstruSupp_Unadj InstruSupp_Unadj \nPercHostil_Unadj PercHostil_Unadj \nPercReject_Unadj PercReject_Unadj \nER40FEAR ER40FEAR \nER40SAD ER40SAD \nER40HAP ER40HAP \n\n phenotype_long \ntarget \nCogCrystalComp_AgeAdj NIH Toolbox Cognition Crystallized Composite: ... \nPicVocab_AgeAdj NIH Toolbox Picture Vocabulary Test: Age-Adjus... \nCogTotalComp_AgeAdj NIH Toolbox Cognition Total Composite Score: A... \nReadEng_AgeAdj NIH Toolbox Oral Reading Recognition Test: Age... \nCogEarlyComp_AgeAdj NIH Toolbox Cognition Early Childhood Composit... \nVSPLOT_TC Variable Short Penn Line Orientation: Total Nu... \nCogFluidComp_AgeAdj NIH Toolbox Cognition Fluid Composite: Age Adj... \nPMAT24_A_CR Penn Progressive Matrices: Number of Correct R... \nListSort_AgeAdj NIH Toolbox List Sorting Working Memory Test: ... \nDDisc_AUC_200 Delay Discounting: Area Under the Curve for Di... \nPicSeq_AgeAdj NIH Toolbox Picture Sequence Memory Test: Age-... \nCardSort_AgeAdj NIH Toolbox Dimensional Change Card Sort Test:... \nSCPT_SPEC Short Penn Continuous Performance Test: Specif... \nFlanker_AgeAdj NIH Toolbox Flanker Inhibitory Control and Att... \nProcSpeed_AgeAdj NIH Toolbox Pattern Comparison Processing Spee... \nIWRD_TOT Penn Word Memory Test: Total Number of Correct... \nSCPT_SEN Short Penn Continuous Performance Test: Sensit... \nNEOFAC_O NEO-FFI Openness to Experience (NEOFAC_O) \nNEOFAC_A NEO-FFI Agreeableness (NEOFAC_A) \nNEOFAC_C NEO-FFI Conscientiousness (NEOFAC_C) \nNEOFAC_E NEO-FFI Extraversion (NEOFAC_E) \nNEOFAC_N NEO-FFI Neuroticism (NEOFAC_N) \nPSQI_Score Sleep (Pittsburgh Sleep Questionnaire) Total S... \nMMSE_Score Mini Mental Status Exam Total Score \nDexterity_AgeAdj NIH Toolbox 9-hole Pegboard Dexterity Test : A... \nNoise_Comp NIH Toolbox Words-In-Noise Age 6+: Computed Score \nTaste_AgeAdj NIH Toolbox Regional Taste Intensity Age 12+ A... \nMars_Final Mars Final Contrast Sensitivity Score \nOdor_AgeAdj NIH Toolbox Odor Identification Age 3+ Age-Adj... \nPainInterf_Tscore NIH Toolbox Pain Interference Survey Age 18+: ... \nLifeSatisf_Unadj NIH Toolbox General Life Satisfaction Survey: ... \nAngAggr_Unadj NIH Toolbox Anger-Physical Aggression Survey: ... \nFearAffect_Unadj NIH Toolbox Fear-Affect Survey: Unadjusted Sca... \nSadness_Unadj NIH Toolbox Sadness Survey: Unadjusted Scale S... \nEmotSupp_Unadj NIH Toolbox Emotional Support Survey: Unadjust... \nLoneliness_Unadj NIH Toolbox Loneliness Survey: Unadjusted Scal... \nPercStress_Unadj NIH Toolbox Perceived Stress Survey: Unadjuste... \nSelfEff_Unadj NIH Toolbox Self-Efficacy Survey: Unadjusted S... \nMeanPurp_Unadj NIH Toolbox Meaning and Purpose Survey: Unadju... \nPosAffect_Unadj NIH Toolbox Positive Affect Survey: Unadjusted... \nAngAffect_Unadj NIH Toolbox Anger-Affect Survey: Unadjusted Sc... \nFriendship_Unadj NIH Toolbox Friendship Survey: Unadjusted Scal... \nER40ANG Penn Emotion Recognition Test: Number of Corre... \nAngHostil_Unadj NIH Toolbox Anger-Hostility Survey: Unadjusted... \nFearSomat_Unadj NIH Toolbox Fear-Somatic Arousal Survey: Unadj... \nER40NOE Penn Emotion Recognition Test: Number of Corre... \nInstruSupp_Unadj NIH Toolbox Instrumental Support Survey: Unadj... \nPercHostil_Unadj NIH Toolbox Perceived Hostility Survey: Unadju... \nPercReject_Unadj NIH Toolbox Perceived Rejection Survey: Unadju... \nER40FEAR Penn Emotion Recognition Test: Number of Corre... \nER40SAD Penn Emotion Recognition Test: Number of Corre... \nER40HAP Penn Emotion Recognition Test: Number of Corre... ", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
0r_discoverycategoryphenotypephenotype_long
target
CogCrystalComp_AgeAdj100.0000000.509356CognitionCogCrystalComp_AgeAdjNIH Toolbox Cognition Crystallized Composite: ...
PicVocab_AgeAdj100.0000000.474802CognitionPicVocab_AgeAdjNIH Toolbox Picture Vocabulary Test: Age-Adjus...
CogTotalComp_AgeAdj100.0000000.472652CognitionCogTotalComp_AgeAdjNIH Toolbox Cognition Total Composite Score: A...
ReadEng_AgeAdj100.0000000.450867CognitionReadEng_AgeAdjNIH Toolbox Oral Reading Recognition Test: Age...
CogEarlyComp_AgeAdj100.0000000.350107CognitionCogEarlyComp_AgeAdjNIH Toolbox Cognition Early Childhood Composit...
VSPLOT_TC100.0000000.330419CognitionVSPLOT_TCVariable Short Penn Line Orientation: Total Nu...
CogFluidComp_AgeAdj100.0000000.304980CognitionCogFluidComp_AgeAdjNIH Toolbox Cognition Fluid Composite: Age Adj...
PMAT24_A_CR100.0000000.292243CognitionPMAT24_A_CRPenn Progressive Matrices: Number of Correct R...
ListSort_AgeAdj100.0000000.272622CognitionListSort_AgeAdjNIH Toolbox List Sorting Working Memory Test: ...
DDisc_AUC_200100.0000000.208845CognitionDDisc_AUC_200Delay Discounting: Area Under the Curve for Di...
PicSeq_AgeAdj100.0000000.182331CognitionPicSeq_AgeAdjNIH Toolbox Picture Sequence Memory Test: Age-...
CardSort_AgeAdj100.0000000.178966CognitionCardSort_AgeAdjNIH Toolbox Dimensional Change Card Sort Test:...
SCPT_SPEC100.0000000.148358CognitionSCPT_SPECShort Penn Continuous Performance Test: Specif...
Flanker_AgeAdj98.8764040.132261CognitionFlanker_AgeAdjNIH Toolbox Flanker Inhibitory Control and Att...
ProcSpeed_AgeAdj84.6153850.117641CognitionProcSpeed_AgeAdjNIH Toolbox Pattern Comparison Processing Spee...
IWRD_TOT36.5384620.081018CognitionIWRD_TOTPenn Word Memory Test: Total Number of Correct...
SCPT_SEN0.0000000.004953CognitionSCPT_SENShort Penn Continuous Performance Test: Sensit...
NEOFAC_O100.0000000.265051PersonalityNEOFAC_ONEO-FFI Openness to Experience (NEOFAC_O)
NEOFAC_A100.0000000.192264PersonalityNEOFAC_ANEO-FFI Agreeableness (NEOFAC_A)
NEOFAC_C100.0000000.179737PersonalityNEOFAC_CNEO-FFI Conscientiousness (NEOFAC_C)
NEOFAC_E98.9361700.157920PersonalityNEOFAC_ENEO-FFI Extraversion (NEOFAC_E)
NEOFAC_N40.0000000.073143PersonalityNEOFAC_NNEO-FFI Neuroticism (NEOFAC_N)
PSQI_Score98.8505750.142913AlertnessPSQI_ScoreSleep (Pittsburgh Sleep Questionnaire) Total S...
MMSE_Score89.8734180.112352AlertnessMMSE_ScoreMini Mental Status Exam Total Score
Dexterity_AgeAdj98.9247310.169838MotorDexterity_AgeAdjNIH Toolbox 9-hole Pegboard Dexterity Test : A...
Noise_Comp97.6744190.137544SensoryNoise_CompNIH Toolbox Words-In-Noise Age 6+: Computed Score
Taste_AgeAdj43.1034480.094419SensoryTaste_AgeAdjNIH Toolbox Regional Taste Intensity Age 12+ A...
Mars_Final6.6666670.081530SensoryMars_FinalMars Final Contrast Sensitivity Score
Odor_AgeAdj0.0000000.034223SensoryOdor_AgeAdjNIH Toolbox Odor Identification Age 3+ Age-Adj...
PainInterf_Tscore0.0000000.032742SensoryPainInterf_TscoreNIH Toolbox Pain Interference Survey Age 18+: ...
LifeSatisf_Unadj100.0000000.198733EmotionLifeSatisf_UnadjNIH Toolbox General Life Satisfaction Survey: ...
AngAggr_Unadj100.0000000.194056EmotionAngAggr_UnadjNIH Toolbox Anger-Physical Aggression Survey: ...
FearAffect_Unadj100.0000000.155632EmotionFearAffect_UnadjNIH Toolbox Fear-Affect Survey: Unadjusted Sca...
Sadness_Unadj94.0476190.132782EmotionSadness_UnadjNIH Toolbox Sadness Survey: Unadjusted Scale S...
EmotSupp_Unadj96.5517240.126029EmotionEmotSupp_UnadjNIH Toolbox Emotional Support Survey: Unadjust...
Loneliness_Unadj71.6666670.101812EmotionLoneliness_UnadjNIH Toolbox Loneliness Survey: Unadjusted Scal...
PercStress_Unadj62.5000000.100211EmotionPercStress_UnadjNIH Toolbox Perceived Stress Survey: Unadjuste...
SelfEff_Unadj81.8181820.099904EmotionSelfEff_UnadjNIH Toolbox Self-Efficacy Survey: Unadjusted S...
MeanPurp_Unadj50.7692310.099847EmotionMeanPurp_UnadjNIH Toolbox Meaning and Purpose Survey: Unadju...
PosAffect_Unadj55.2238810.098838EmotionPosAffect_UnadjNIH Toolbox Positive Affect Survey: Unadjusted...
AngAffect_Unadj69.2307690.096750EmotionAngAffect_UnadjNIH Toolbox Anger-Affect Survey: Unadjusted Sc...
Friendship_Unadj44.0000000.085313EmotionFriendship_UnadjNIH Toolbox Friendship Survey: Unadjusted Scal...
ER40ANG44.6808510.072909EmotionER40ANGPenn Emotion Recognition Test: Number of Corre...
AngHostil_Unadj41.0256410.063366EmotionAngHostil_UnadjNIH Toolbox Anger-Hostility Survey: Unadjusted...
FearSomat_Unadj14.2857140.061101EmotionFearSomat_UnadjNIH Toolbox Fear-Somatic Arousal Survey: Unadj...
ER40NOE16.6666670.050150EmotionER40NOEPenn Emotion Recognition Test: Number of Corre...
InstruSupp_Unadj4.1666670.041920EmotionInstruSupp_UnadjNIH Toolbox Instrumental Support Survey: Unadj...
PercHostil_Unadj0.0000000.037622EmotionPercHostil_UnadjNIH Toolbox Perceived Hostility Survey: Unadju...
PercReject_Unadj4.3478260.036887EmotionPercReject_UnadjNIH Toolbox Perceived Rejection Survey: Unadju...
ER40FEAR0.0000000.033788EmotionER40FEARPenn Emotion Recognition Test: Number of Corre...
ER40SAD10.0000000.033559EmotionER40SADPenn Emotion Recognition Test: Number of Corre...
ER40HAPNaN-0.059106EmotionER40HAPPenn Emotion Recognition Test: Number of Corre...
\n
" }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "for i in df_plot.index:\n", " df_plot.loc[i, 'phenotype_long'] = phenotypes.loc[phenotypes.columnHeader==i, 'fullDisplayName'].values[0]\n", "df_plot" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'behavior' is not defined", "output_type": "error", "traceback": [ "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", "\u001B[0;31mNameError\u001B[0m Traceback (most recent call last)", "Cell \u001B[0;32mIn[9], line 1\u001B[0m\n\u001B[0;32m----> 1\u001B[0m tmp \u001B[38;5;241m=\u001B[39m \u001B[43mbehavior\u001B[49m[[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mCogTotalComp_AgeAdj\u001B[39m\u001B[38;5;124m'\u001B[39m, \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mCogFluidComp_AgeAdj\u001B[39m\u001B[38;5;124m'\u001B[39m, \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mPMAT24_A_CR\u001B[39m\u001B[38;5;124m'\u001B[39m, \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mFlanker_AgeAdj\u001B[39m\u001B[38;5;124m'\u001B[39m, \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mCardSort_AgeAdj\u001B[39m\u001B[38;5;124m'\u001B[39m, \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mPicSeq_AgeAdj\u001B[39m\u001B[38;5;124m'\u001B[39m]]\n\u001B[1;32m 2\u001B[0m tmp\u001B[38;5;241m.\u001B[39mcorr()\n", "\u001B[0;31mNameError\u001B[0m: name 'behavior' is not defined" ] } ], "source": [ "tmp = behavior[['CogTotalComp_AgeAdj', 'CogFluidComp_AgeAdj', 'PMAT24_A_CR', 'Flanker_AgeAdj', 'CardSort_AgeAdj', 'PicSeq_AgeAdj']]\n", "tmp.corr()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": " \n " }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.plotly.v1+json": { "data": [ { "labels": [ "Replicable", "Not Replicable" ], "marker": { "colors": [ "green", "red" ] }, "name": "Trait-like", "rotation": -90, "values": [ 23, 7 ], "type": "pie", "domain": { "x": [ 0.0, 0.45 ], "y": [ 0.0, 1.0 ] }, "hole": 0.5, "hoverinfo": "label+percent+name" }, { "labels": [ "Replicable", "Not Replicable" ], "name": "State-like", "rotation": 90, "values": [ 6, 17 ], "type": "pie", "domain": { "x": [ 0.55, 1.0 ], "y": [ 0.0, 1.0 ] }, "hole": 0.5, "hoverinfo": "label+percent+name" } ], "layout": { "template": { "data": { "barpolar": [ { "marker": { "line": { "color": "white", "width": 0.5 }, "pattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 } }, "type": "barpolar" } ], "bar": [ { "error_x": { "color": "rgb(36,36,36)" }, "error_y": { "color": "rgb(36,36,36)" }, "marker": { "line": { "color": "white", "width": 0.5 }, "pattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 } }, "type": "bar" } ], "carpet": [ { "aaxis": { "endlinecolor": "rgb(36,36,36)", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "rgb(36,36,36)" }, "baxis": { "endlinecolor": "rgb(36,36,36)", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "rgb(36,36,36)" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 1, "tickcolor": "rgb(36,36,36)", "ticks": "outside" }, "type": "choropleth" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 1, "tickcolor": "rgb(36,36,36)", "ticks": "outside" }, "type": "contourcarpet" } ], "contour": [ { "colorbar": { "outlinewidth": 1, "tickcolor": "rgb(36,36,36)", "ticks": "outside" }, "colorscale": [ [ 0.0, "#440154" ], [ 0.1111111111111111, "#482878" ], [ 0.2222222222222222, "#3e4989" ], [ 0.3333333333333333, "#31688e" ], [ 0.4444444444444444, "#26828e" ], [ 0.5555555555555556, "#1f9e89" ], [ 0.6666666666666666, "#35b779" ], [ 0.7777777777777778, "#6ece58" ], [ 0.8888888888888888, "#b5de2b" ], [ 1.0, "#fde725" ] ], "type": "contour" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 1, "tickcolor": "rgb(36,36,36)", "ticks": "outside" }, "colorscale": [ [ 0.0, "#440154" ], [ 0.1111111111111111, "#482878" ], [ 0.2222222222222222, "#3e4989" ], [ 0.3333333333333333, "#31688e" ], [ 0.4444444444444444, "#26828e" ], [ 0.5555555555555556, "#1f9e89" ], [ 0.6666666666666666, "#35b779" ], [ 0.7777777777777778, "#6ece58" ], [ 0.8888888888888888, "#b5de2b" ], [ 1.0, "#fde725" ] ], "type": "heatmapgl" } ], "heatmap": [ { "colorbar": { "outlinewidth": 1, "tickcolor": "rgb(36,36,36)", "ticks": "outside" }, "colorscale": [ [ 0.0, "#440154" ], [ 0.1111111111111111, "#482878" ], [ 0.2222222222222222, "#3e4989" ], [ 0.3333333333333333, "#31688e" ], [ 0.4444444444444444, "#26828e" ], [ 0.5555555555555556, "#1f9e89" ], [ 0.6666666666666666, "#35b779" ], [ 0.7777777777777778, "#6ece58" ], [ 0.8888888888888888, "#b5de2b" ], [ 1.0, "#fde725" ] ], "type": "heatmap" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 1, "tickcolor": "rgb(36,36,36)", "ticks": "outside" }, "colorscale": [ [ 0.0, "#440154" ], [ 0.1111111111111111, "#482878" ], [ 0.2222222222222222, "#3e4989" ], [ 0.3333333333333333, "#31688e" ], [ 0.4444444444444444, "#26828e" ], [ 0.5555555555555556, "#1f9e89" ], [ 0.6666666666666666, "#35b779" ], [ 0.7777777777777778, "#6ece58" ], [ 0.8888888888888888, "#b5de2b" ], [ 1.0, "#fde725" ] ], "type": "histogram2dcontour" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 1, "tickcolor": "rgb(36,36,36)", "ticks": "outside" }, "colorscale": [ [ 0.0, "#440154" ], [ 0.1111111111111111, "#482878" ], [ 0.2222222222222222, "#3e4989" ], [ 0.3333333333333333, "#31688e" ], [ 0.4444444444444444, "#26828e" ], [ 0.5555555555555556, "#1f9e89" ], [ 0.6666666666666666, "#35b779" ], [ 0.7777777777777778, "#6ece58" ], [ 0.8888888888888888, "#b5de2b" ], [ 1.0, "#fde725" ] ], "type": "histogram2d" } ], "histogram": [ { "marker": { "line": { "color": "white", "width": 0.6 } }, "type": "histogram" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 1, "tickcolor": "rgb(36,36,36)", "ticks": "outside" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 1, "tickcolor": "rgb(36,36,36)", "ticks": "outside" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 1, "tickcolor": "rgb(36,36,36)", "ticks": "outside" } }, "marker": { "colorbar": { "outlinewidth": 1, "tickcolor": "rgb(36,36,36)", "ticks": "outside" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 1, "tickcolor": "rgb(36,36,36)", "ticks": "outside" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 1, "tickcolor": "rgb(36,36,36)", "ticks": "outside" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 1, "tickcolor": "rgb(36,36,36)", "ticks": "outside" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 1, "tickcolor": "rgb(36,36,36)", "ticks": "outside" } }, "type": "scattermapbox" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 1, "tickcolor": "rgb(36,36,36)", "ticks": "outside" } }, "type": "scatterpolargl" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 1, "tickcolor": "rgb(36,36,36)", "ticks": "outside" } }, "type": "scatterpolar" } ], "scatter": [ { "fillpattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 }, "type": "scatter" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 1, "tickcolor": "rgb(36,36,36)", "ticks": "outside" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 1, "tickcolor": "rgb(36,36,36)", "ticks": "outside" }, "colorscale": [ [ 0.0, "#440154" ], [ 0.1111111111111111, "#482878" ], [ 0.2222222222222222, "#3e4989" ], [ 0.3333333333333333, "#31688e" ], [ 0.4444444444444444, "#26828e" ], [ 0.5555555555555556, "#1f9e89" ], [ 0.6666666666666666, "#35b779" ], [ 0.7777777777777778, "#6ece58" ], [ 0.8888888888888888, "#b5de2b" ], [ 1.0, "#fde725" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "rgb(237,237,237)" }, "line": { "color": "white" } }, "header": { "fill": { "color": "rgb(217,217,217)" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 1, "tickcolor": "rgb(36,36,36)", "ticks": "outside" } }, "colorscale": { "diverging": [ [ 0.0, "rgb(103,0,31)" ], [ 0.1, "rgb(178,24,43)" ], [ 0.2, "rgb(214,96,77)" ], [ 0.3, "rgb(244,165,130)" ], [ 0.4, "rgb(253,219,199)" ], [ 0.5, "rgb(247,247,247)" ], [ 0.6, "rgb(209,229,240)" ], [ 0.7, "rgb(146,197,222)" ], [ 0.8, "rgb(67,147,195)" ], [ 0.9, "rgb(33,102,172)" ], [ 1.0, "rgb(5,48,97)" ] ], "sequential": [ [ 0.0, "#440154" ], [ 0.1111111111111111, "#482878" ], [ 0.2222222222222222, "#3e4989" ], [ 0.3333333333333333, "#31688e" ], [ 0.4444444444444444, "#26828e" ], [ 0.5555555555555556, "#1f9e89" ], [ 0.6666666666666666, "#35b779" ], [ 0.7777777777777778, "#6ece58" ], [ 0.8888888888888888, "#b5de2b" ], [ 1.0, "#fde725" ] ], "sequentialminus": [ [ 0.0, "#440154" ], [ 0.1111111111111111, "#482878" ], [ 0.2222222222222222, "#3e4989" ], [ 0.3333333333333333, "#31688e" ], [ 0.4444444444444444, "#26828e" ], [ 0.5555555555555556, "#1f9e89" ], [ 0.6666666666666666, "#35b779" ], [ 0.7777777777777778, "#6ece58" ], [ 0.8888888888888888, "#b5de2b" ], [ 1.0, "#fde725" ] ] }, "colorway": [ "#1F77B4", "#FF7F0E", "#2CA02C", "#D62728", "#9467BD", "#8C564B", "#E377C2", "#7F7F7F", "#BCBD22", "#17BECF" ], "font": { "color": "rgb(36,36,36)" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "white", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "white", "polar": { "angularaxis": { "gridcolor": "rgb(232,232,232)", "linecolor": "rgb(36,36,36)", "showgrid": false, "showline": true, "ticks": "outside" }, "bgcolor": "white", "radialaxis": { "gridcolor": "rgb(232,232,232)", "linecolor": "rgb(36,36,36)", "showgrid": false, "showline": true, "ticks": "outside" } }, "scene": { "xaxis": { "backgroundcolor": "white", "gridcolor": "rgb(232,232,232)", "gridwidth": 2, "linecolor": "rgb(36,36,36)", "showbackground": true, "showgrid": false, "showline": true, "ticks": "outside", "zeroline": false, "zerolinecolor": "rgb(36,36,36)" }, "yaxis": { "backgroundcolor": "white", "gridcolor": "rgb(232,232,232)", "gridwidth": 2, "linecolor": "rgb(36,36,36)", "showbackground": true, "showgrid": false, "showline": true, "ticks": "outside", "zeroline": false, "zerolinecolor": "rgb(36,36,36)" }, "zaxis": { "backgroundcolor": "white", "gridcolor": "rgb(232,232,232)", "gridwidth": 2, "linecolor": "rgb(36,36,36)", "showbackground": true, "showgrid": false, "showline": true, "ticks": "outside", "zeroline": false, "zerolinecolor": "rgb(36,36,36)" } }, "shapedefaults": { "fillcolor": "black", "line": { "width": 0 }, "opacity": 0.3 }, "ternary": { "aaxis": { "gridcolor": "rgb(232,232,232)", "linecolor": "rgb(36,36,36)", "showgrid": false, "showline": true, "ticks": "outside" }, "baxis": { "gridcolor": "rgb(232,232,232)", "linecolor": "rgb(36,36,36)", "showgrid": false, "showline": true, "ticks": "outside" }, "bgcolor": "white", "caxis": { "gridcolor": "rgb(232,232,232)", "linecolor": "rgb(36,36,36)", "showgrid": false, "showline": true, "ticks": "outside" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "rgb(232,232,232)", "linecolor": "rgb(36,36,36)", "showgrid": false, "showline": true, "ticks": "outside", "title": { "standoff": 15 }, "zeroline": false, "zerolinecolor": "rgb(36,36,36)" }, "yaxis": { "automargin": true, "gridcolor": "rgb(232,232,232)", "linecolor": "rgb(36,36,36)", "showgrid": false, "showline": true, "ticks": "outside", "title": { "standoff": 15 }, "zeroline": false, "zerolinecolor": "rgb(36,36,36)" } } }, "title": { "text": "" }, "font": { "size": 10, "color": "gray" }, "margin": { "l": 5, "r": 5, "b": 10, "t": 10, "pad": 4 }, "legend": { "font": { "color": "black" }, "yanchor": "top", "y": 0.99, "xanchor": "left", "x": -0.4 }, "autosize": true, "height": 70, "width": 500, "annotations": [ { "showarrow": false, "text": "Trait-like", "x": 0.0, "y": 1.1, "font": { "size": 12, "color": "black" } }, { "showarrow": false, "text": "State-like", "x": 0.72, "y": 1.1, "font": { "size": 12, "color": "black" } } ] }, "config": { "plotlyServerURL": "https://plot.ly" } }, "text/html": "
" }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import plotly.graph_objects as go\n", "from plotly.subplots import make_subplots\n", "\n", "labels = [\"Replicable\", \"Not Replicable\"]\n", "\n", "# Create subplots: use 'domain' type for Pie subplot\n", "fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])\n", "fig.add_trace(go.Pie(labels=labels, values=[15+4+2+1+1, 2+1+0+0+4], name=\"Trait-like\", marker_colors=['green', 'red'], rotation=-90),\n", " 1, 1)\n", "fig.add_trace(go.Pie(labels=labels, values=[6, 17], name=\"State-like\", rotation=90),\n", " 1, 2)\n", "\n", "# Use `hole` to create a donut-like pie chart\n", "fig.update_traces(hole=.5, hoverinfo=\"label+percent+name\")\n", "\n", "fig.update_layout(\n", " template='simple_white',\n", " title_text=\"\",\n", " font=dict(\n", " size=10,\n", " color=\"gray\"),\n", " autosize=True,\n", " height=70,\n", " width=500,\n", " margin=dict(\n", " l=5,\n", " r=5,\n", " b=10,\n", " t=10,\n", " pad=4),\n", " legend=dict(\n", " yanchor=\"top\",\n", " y=0.99,\n", " xanchor=\"left\",\n", " x=-0.4,\n", " font_color='black'\n", " ),\n", " # Add annotations in the center of the donut pies.\n", " annotations=[dict(text='Trait-like', x=0.0, y=1.1, font_size=12, font_color='black', showarrow=False),\n", " dict(text='State-like', x=0.72, y=1.1, font_size=12, font_color='black', showarrow=False)])\n", "fig.show()\n", "import plotly.io as pio\n", "pio.write_html(fig, file='docs/_includes/pie.html', auto_open=False)" ] }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [], "metadata": { "collapsed": false } } ], "metadata": { "hide_input": false, "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.6" } }, "nbformat": 4, "nbformat_minor": 2 }