{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Permutation Tests" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# boilerplate\n", "%matplotlib inline\n", "import math\n", "import numpy as np\n", "import scipy as sp\n", "import pandas as pd\n", "from scipy import stats # distributions\n", "from scipy import special # special functions\n", "from scipy import random # random variables, distributions, etc.\n", "from scipy.optimize import brentq\n", "from scipy.stats import (binom, hypergeom)\n", "import matplotlib.pyplot as plt\n", "from ipywidgets import widgets\n", "\n", "khazan_fn = './Data/khazanEtal20.csv'" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IDClassGenderAgenumber.online.courses.takenTAQuestionLikert.ScoreThemeScoreNegPos
013female213JesseFacilitated.learning4teaching1
113female213JesseProvided.helpful.feedback5teaching2
213female213JesseIs.an.expert4knowledge1
313female213JesseGraded.in.a.timely.manner5professional2
413female213JesseGraded.Fairly5teaching2
\n", "
" ], "text/plain": [ " ID Class Gender Age number.online.courses.taken TA \\\n", "0 1 3 female 21 3 Jesse \n", "1 1 3 female 21 3 Jesse \n", "2 1 3 female 21 3 Jesse \n", "3 1 3 female 21 3 Jesse \n", "4 1 3 female 21 3 Jesse \n", "\n", " Question Likert.Score Theme ScoreNegPos \n", "0 Facilitated.learning 4 teaching 1 \n", "1 Provided.helpful.feedback 5 teaching 2 \n", "2 Is.an.expert 4 knowledge 1 \n", "3 Graded.in.a.timely.manner 5 professional 2 \n", "4 Graded.Fairly 5 teaching 2 " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_csv(khazan_fn)\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "qs = df['Question'].unique()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "for q in qs:\n", " mask = df['Question'] == q\n", " df.loc[mask,q] = df[mask]['Likert.Score']" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "df = df.drop(columns=['Class','Age','number.online.courses.taken','Question','Likert.Score',\\\n", " 'Theme','ScoreNegPos'])\n", "df = df.set_index('ID')\n", "df['Did.NOT.respond.to.email.promptly'] = -df['Did.NOT.respond.to.email.promptly']" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
GenderTAFacilitated.learningProvided.helpful.feedbackIs.an.expertGraded.in.a.timely.mannerGraded.FairlyDid.NOT.respond.to.email.promptlyKnowledgable.of.course.contentHelpful.feedback.vias.Canvas.discussionConsistently.fulfilled.responsibilitiesConsiderate.in.communicationTreated.me.with.respectEnthusiasticProfessionalTA.again
ID
1femaleJesse4.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
1femaleJesseNaN5.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
1femaleJesseNaNNaN4.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
1femaleJesseNaNNaNNaN5.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
1femaleJesseNaNNaNNaNNaN5.0NaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", "
" ], "text/plain": [ " Gender TA Facilitated.learning Provided.helpful.feedback \\\n", "ID \n", "1 female Jesse 4.0 NaN \n", "1 female Jesse NaN 5.0 \n", "1 female Jesse NaN NaN \n", "1 female Jesse NaN NaN \n", "1 female Jesse NaN NaN \n", "\n", " Is.an.expert Graded.in.a.timely.manner Graded.Fairly \\\n", "ID \n", "1 NaN NaN NaN \n", "1 NaN NaN NaN \n", "1 4.0 NaN NaN \n", "1 NaN 5.0 NaN \n", "1 NaN NaN 5.0 \n", "\n", " Did.NOT.respond.to.email.promptly Knowledgable.of.course.content \\\n", "ID \n", "1 NaN NaN \n", "1 NaN NaN \n", "1 NaN NaN \n", "1 NaN NaN \n", "1 NaN NaN \n", "\n", " Helpful.feedback.vias.Canvas.discussion \\\n", "ID \n", "1 NaN \n", "1 NaN \n", "1 NaN \n", "1 NaN \n", "1 NaN \n", "\n", " Consistently.fulfilled.responsibilities Considerate.in.communication \\\n", "ID \n", "1 NaN NaN \n", "1 NaN NaN \n", "1 NaN NaN \n", "1 NaN NaN \n", "1 NaN NaN \n", "\n", " Treated.me.with.respect Enthusiastic Professional TA.again \n", "ID \n", "1 NaN NaN NaN NaN \n", "1 NaN NaN NaN NaN \n", "1 NaN NaN NaN NaN \n", "1 NaN NaN NaN NaN \n", "1 NaN NaN NaN NaN " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IDTAFacilitated.learningProvided.helpful.feedbackIs.an.expertGraded.in.a.timely.mannerGraded.FairlyDid.NOT.respond.to.email.promptlyKnowledgable.of.course.contentHelpful.feedback.vias.Canvas.discussionConsistently.fulfilled.responsibilitiesConsiderate.in.communicationTreated.me.with.respectEnthusiasticProfessionalTA.again
01Jesse4.05.04.05.05.0-5.05.05.05.05.05.05.05.04.0
12Jesse4.05.05.04.04.0-2.04.03.05.05.05.05.05.05.0
23Jesse5.05.05.05.05.0-5.05.05.05.05.05.05.05.05.0
34Jesse4.04.04.04.04.0-4.05.04.04.04.05.04.05.04.0
45Emily4.04.03.04.02.0-4.03.03.05.03.03.03.04.03.0
\n", "
" ], "text/plain": [ " ID TA Facilitated.learning Provided.helpful.feedback Is.an.expert \\\n", "0 1 Jesse 4.0 5.0 4.0 \n", "1 2 Jesse 4.0 5.0 5.0 \n", "2 3 Jesse 5.0 5.0 5.0 \n", "3 4 Jesse 4.0 4.0 4.0 \n", "4 5 Emily 4.0 4.0 3.0 \n", "\n", " Graded.in.a.timely.manner Graded.Fairly \\\n", "0 5.0 5.0 \n", "1 4.0 4.0 \n", "2 5.0 5.0 \n", "3 4.0 4.0 \n", "4 4.0 2.0 \n", "\n", " Did.NOT.respond.to.email.promptly Knowledgable.of.course.content \\\n", "0 -5.0 5.0 \n", "1 -2.0 4.0 \n", "2 -5.0 5.0 \n", "3 -4.0 5.0 \n", "4 -4.0 3.0 \n", "\n", " Helpful.feedback.vias.Canvas.discussion \\\n", "0 5.0 \n", "1 3.0 \n", "2 5.0 \n", "3 4.0 \n", "4 3.0 \n", "\n", " Consistently.fulfilled.responsibilities Considerate.in.communication \\\n", "0 5.0 5.0 \n", "1 5.0 5.0 \n", "2 5.0 5.0 \n", "3 4.0 4.0 \n", "4 5.0 3.0 \n", "\n", " Treated.me.with.respect Enthusiastic Professional TA.again \n", "0 5.0 5.0 5.0 4.0 \n", "1 5.0 5.0 5.0 5.0 \n", "2 5.0 5.0 5.0 5.0 \n", "3 5.0 4.0 5.0 4.0 \n", "4 3.0 3.0 4.0 3.0 " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "agg_dict = {'TA': 'first'}\n", "for q in qs:\n", " agg_dict[q] = np.nansum\n", "scores = df.groupby('ID').agg(agg_dict).reset_index()\n", "scores.head()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IDTAFacilitated.learningProvided.helpful.feedbackIs.an.expertGraded.in.a.timely.mannerGraded.FairlyDid.NOT.respond.to.email.promptlyKnowledgable.of.course.contentHelpful.feedback.vias.Canvas.discussionConsistently.fulfilled.responsibilitiesConsiderate.in.communicationTreated.me.with.respectEnthusiasticProfessionalTA.again
45Emily4.04.03.04.02.0-4.03.03.05.03.03.03.04.03.0
56Emily3.04.03.04.04.0-3.04.03.04.03.03.03.03.04.0
67Emily4.05.04.05.03.0-5.04.04.04.04.04.04.05.03.0
1011Emily3.03.04.03.02.0-3.03.03.05.05.05.05.03.01.0
1112Emily5.05.05.05.05.0-5.05.05.05.05.05.05.05.05.0
\n", "
" ], "text/plain": [ " ID TA Facilitated.learning Provided.helpful.feedback Is.an.expert \\\n", "4 5 Emily 4.0 4.0 3.0 \n", "5 6 Emily 3.0 4.0 3.0 \n", "6 7 Emily 4.0 5.0 4.0 \n", "10 11 Emily 3.0 3.0 4.0 \n", "11 12 Emily 5.0 5.0 5.0 \n", "\n", " Graded.in.a.timely.manner Graded.Fairly \\\n", "4 4.0 2.0 \n", "5 4.0 4.0 \n", "6 5.0 3.0 \n", "10 3.0 2.0 \n", "11 5.0 5.0 \n", "\n", " Did.NOT.respond.to.email.promptly Knowledgable.of.course.content \\\n", "4 -4.0 3.0 \n", "5 -3.0 4.0 \n", "6 -5.0 4.0 \n", "10 -3.0 3.0 \n", "11 -5.0 5.0 \n", "\n", " Helpful.feedback.vias.Canvas.discussion \\\n", "4 3.0 \n", "5 3.0 \n", "6 4.0 \n", "10 3.0 \n", "11 5.0 \n", "\n", " Consistently.fulfilled.responsibilities Considerate.in.communication \\\n", "4 5.0 3.0 \n", "5 4.0 3.0 \n", "6 4.0 4.0 \n", "10 5.0 5.0 \n", "11 5.0 5.0 \n", "\n", " Treated.me.with.respect Enthusiastic Professional TA.again \n", "4 3.0 3.0 4.0 3.0 \n", "5 3.0 3.0 3.0 4.0 \n", "6 4.0 4.0 5.0 3.0 \n", "10 5.0 5.0 3.0 1.0 \n", "11 5.0 5.0 5.0 5.0 " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mask = (scores['TA'] == 'Jesse')\n", "jesse = scores.loc[mask].copy()\n", "emily = scores.loc[~mask].copy()\n", "emily.head()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " ID Facilitated.learning Provided.helpful.feedback \\\n", "count 66.000000 55.000000 55.000000 \n", "mean 48.878788 4.018182 4.200000 \n", "std 35.912507 1.146507 1.128749 \n", "min 0.000000 1.000000 1.000000 \n", "25% 13.750000 3.500000 4.000000 \n", "50% 53.000000 4.000000 5.000000 \n", "75% 76.250000 5.000000 5.000000 \n", "max 111.000000 5.000000 5.000000 \n", "\n", " Is.an.expert Graded.in.a.timely.manner Graded.Fairly \\\n", "count 55.000000 55.000000 55.000000 \n", "mean 4.018182 4.309091 3.981818 \n", "std 1.113734 0.920401 1.146507 \n", "min 1.000000 1.000000 1.000000 \n", "25% 3.500000 4.000000 3.500000 \n", "50% 4.000000 5.000000 4.000000 \n", "75% 5.000000 5.000000 5.000000 \n", "max 5.000000 5.000000 5.000000 \n", "\n", " Did.NOT.respond.to.email.promptly Knowledgable.of.course.content \\\n", "count 55.000000 55.000000 \n", "mean -3.909091 4.400000 \n", "std 1.251262 0.852013 \n", "min -5.000000 1.000000 \n", "25% -5.000000 4.000000 \n", "50% -4.000000 5.000000 \n", "75% -3.000000 5.000000 \n", "max -1.000000 5.000000 \n", "\n", " Helpful.feedback.vias.Canvas.discussion \\\n", "count 55.000000 \n", "mean 4.000000 \n", "std 1.122167 \n", "min 1.000000 \n", "25% 3.000000 \n", "50% 4.000000 \n", "75% 5.000000 \n", "max 5.000000 \n", "\n", " Consistently.fulfilled.responsibilities Considerate.in.communication \\\n", "count 55.000000 55.000000 \n", "mean 4.600000 4.509091 \n", "std 0.735351 0.857920 \n", "min 2.000000 1.000000 \n", "25% 4.000000 4.000000 \n", "50% 5.000000 5.000000 \n", "75% 5.000000 5.000000 \n", "max 5.000000 5.000000 \n", "\n", " Treated.me.with.respect Enthusiastic Professional TA.again \n", "count 55.000000 55.000000 55.000000 55.000000 \n", "mean 4.563636 4.309091 4.600000 3.800000 \n", "std 0.787956 1.034099 0.807373 1.176939 \n", "min 2.000000 1.000000 2.000000 1.000000 \n", "25% 4.000000 4.000000 5.000000 3.000000 \n", "50% 5.000000 5.000000 5.000000 4.000000 \n", "75% 5.000000 5.000000 5.000000 5.000000 \n", "max 5.000000 5.000000 5.000000 5.000000 ID Facilitated.learning Provided.helpful.feedback \\\n", "count 70.000000 60.000000 60.000000 \n", "mean 49.200000 3.966667 4.266667 \n", "std 38.566938 0.882344 1.006195 \n", "min 0.000000 1.000000 1.000000 \n", "25% 14.250000 3.750000 4.000000 \n", "50% 44.500000 4.000000 5.000000 \n", "75% 83.750000 5.000000 5.000000 \n", "max 115.000000 5.000000 5.000000 \n", "\n", " Is.an.expert Graded.in.a.timely.manner Graded.Fairly \\\n", "count 60.000000 60.000000 60.000000 \n", "mean 4.000000 4.483333 4.066667 \n", "std 0.802538 0.700887 1.087162 \n", "min 2.000000 2.000000 1.000000 \n", "25% 3.000000 4.000000 4.000000 \n", "50% 4.000000 5.000000 4.000000 \n", "75% 5.000000 5.000000 5.000000 \n", "max 5.000000 5.000000 5.000000 \n", "\n", " Did.NOT.respond.to.email.promptly Knowledgable.of.course.content \\\n", "count 60.000000 60.000000 \n", "mean -4.066667 4.366667 \n", "std 1.205449 0.735692 \n", "min -5.000000 3.000000 \n", "25% -5.000000 4.000000 \n", "50% -5.000000 5.000000 \n", "75% -3.000000 5.000000 \n", "max -1.000000 5.000000 \n", "\n", " Helpful.feedback.vias.Canvas.discussion \\\n", "count 60.000000 \n", "mean 4.133333 \n", "std 0.947193 \n", "min 2.000000 \n", "25% 3.750000 \n", "50% 4.000000 \n", "75% 5.000000 \n", "max 5.000000 \n", "\n", " Consistently.fulfilled.responsibilities Considerate.in.communication \\\n", "count 60.000000 60.000000 \n", "mean 4.533333 4.533333 \n", "std 0.724081 0.700282 \n", "min 2.000000 2.000000 \n", "25% 4.000000 4.000000 \n", "50% 5.000000 5.000000 \n", "75% 5.000000 5.000000 \n", "max 5.000000 5.000000 \n", "\n", " Treated.me.with.respect Enthusiastic Professional TA.again \n", "count 60.000000 60.000000 60.000000 60.000000 \n", "mean 4.666667 4.183333 4.750000 3.950000 \n", "std 0.680644 0.892372 0.571202 1.048405 \n", "min 2.000000 2.000000 3.000000 1.000000 \n", "25% 5.000000 3.000000 5.000000 3.000000 \n", "50% 5.000000 4.000000 5.000000 4.000000 \n", "75% 5.000000 5.000000 5.000000 5.000000 \n", "max 5.000000 5.000000 5.000000 5.000000 \n" ] } ], "source": [ "# there were 11 nonresponders for \"Emily\" and 10 for \"Jesse\"\n", "emily_missing = {'ID':0, 'TA':'Emily' }\n", "jesse_missing = {'ID':0, 'TA':'Jesse' }\n", "for q in qs:\n", " emily_missing[q] = np.nan\n", " jesse_missing[q] = np.nan\n", "for i in range(11):\n", " emily = emily.append(emily_missing, ignore_index=True)\n", "for i in range(10):\n", " jesse = jesse.append(jesse_missing, ignore_index=True)\n", "print(emily.describe(), jesse.describe())" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "from scipy.stats import norm, rankdata\n", "from cryptorandom.sample import random_sample\n", "from cryptorandom.cryptorandom import SHA256\n", "from permute.utils import get_prng, permute\n", "prng = SHA256(1234567890)\n", "\n", "def abs_mean_diff(x, y):\n", " return np.abs(np.nanmean(x)-np.nanmean(y))\n", "\n", "def mean_diff(x, y):\n", " return np.nanmean(x)-np.nanmean(y)\n", "\n", "def sim_npc(X, Y, cols, test_fn, combine=\"fisher\", prng=None, reps=int(10**4), verbose=False):\n", " ts = {}\n", " tv = {}\n", " ps = {}\n", " XY = pd.concat([X,Y])\n", " nx = len(X[cols[0]])\n", " n = len(XY[cols[0]])\n", " all_i = set(range(n))\n", " for c in cols:\n", " ts[c] = test_fn(X[c], Y[c])\n", " tv[c] = []\n", " if verbose:\n", " print('\\nn: {} nx: {} ts:{}\\n'.format(n, nx, ts))\n", " for i in range(reps):\n", " inx = random_sample(n, size=nx, replace=False, prng=prng) \n", " iny = list(all_i - set(inx)) \n", " for c in cols:\n", " tv[c].append(test_fn(XY.iloc[inx][c], XY.iloc[iny][c]))\n", " if verbose and i%(int(reps/10)) == 0:\n", " print(i, [(np.sum(np.array(tv[c]) >= ts[c]) + 1)/(i+2) for c in cols]) \n", " for c in cols:\n", " ps[c] = (np.sum(np.array(tv[c]) >= ts[c]) + 1)/(reps+1) \n", " dist = np.array([tv[c] for c in cols]).T\n", " dist = np.append(dist, np.array([ts[c] for c in cols], ndmin=2), axis=0)\n", " p = npc(np.array([ps[c] for c in cols]), dist, combine=combine)\n", " return p, ts, ps\n", "\n", "def npc(pvalues, distr, combine=\"fisher\", plus1=True):\n", " r\"\"\"\n", " Combines p-values from individual partial test hypotheses $H_{0i}$ against\n", " $H_{1i}$, $i=1,\\dots,n$ to test the global null hypothesis\n", " .. math:: \\cap_{i=1}^n H_{0i}\n", " against the alternative\n", " .. math:: \\cup_{i=1}^n H_{1i}\n", " using an omnibus test statistic.\n", " Parameters\n", " ----------\n", " pvalues : array_like\n", " Array of p-values to combine\n", " distr : array_like\n", " Array of dimension [B, n] where B is the number of permutations and n is\n", " the number of partial hypothesis tests. The $i$th column of distr contains\n", " the simulated null distribution of the $i$th test statistic under $H_{0i}$.\n", " combine : {'fisher', 'liptak', 'tippett'} or function\n", " The combining function to use. Default is \"fisher\".\n", " Valid combining functions must take in p-values as their argument and be\n", " monotonically decreasing in each p-value.\n", " plus1 : bool\n", " flag for whether to add 1 to the numerator and denominator of the\n", " p-value based on the empirical permutation distribution. \n", " Default is True.\n", " Returns\n", " -------\n", " float\n", " A single p-value for the global test\n", " \"\"\"\n", " n = len(pvalues)\n", " B = distr.shape[0]\n", " if n < 2:\n", " raise ValueError(\"One p-value: nothing to combine!\")\n", " if n != distr.shape[1]:\n", " raise ValueError(\"Mismatch in number of p-values and size of distr\")\n", "\n", " combine_library = {\n", " \"fisher\": fisher,\n", " \"liptak\": liptak,\n", " \"tippett\": tippett\n", " }\n", " if callable(combine):\n", " if not check_combfunc_monotonic(pvalues, combine):\n", " raise ValueError(\n", " \"Bad combining function: must be monotonically decreasing in each p-value\")\n", " combine_func = combine\n", " else:\n", " combine_func = combine_library[combine]\n", "\n", " # Convert test statistic distribution to p-values\n", " combined_stat_distr = [0] * B\n", " pvalues_from_distr = np.zeros((B, n))\n", " for j in range(n):\n", " pvalues_from_distr[:, j] = 1 - rankdata(distr[:, j], method=\"min\")/(plus1+B) + (1 + plus1)/(plus1+B)\n", " if combine == \"liptak\":\n", " toobig = np.where(pvalues_from_distr >= 1)\n", " pvalues_from_distr[toobig] = 1 - np.finfo(float).eps\n", " combined_stat_distr = np.apply_along_axis(\n", " combine_func, 1, pvalues_from_distr)\n", "\n", " observed_combined_stat = combine_func(pvalues)\n", " return (plus1 + np.sum(combined_stat_distr >= observed_combined_stat)) / (plus1+B)\n", "\n", "def fisher(pvalues):\n", " r\"\"\"\n", " Apply Fisher's combining function\n", " .. math:: -2 \\sum_i \\log(p_i)\n", " Parameters\n", " ----------\n", " pvalues : array_like\n", " Array of p-values to combine\n", " Returns\n", " -------\n", " float\n", " Fisher's combined test statistic\n", " \"\"\"\n", " return -2*np.log(np.prod(pvalues))\n", "\n", "def liptak(pvalues):\n", " r\"\"\"\n", " Apply Liptak's combining function\n", " .. math:: \\sum_i \\Phi^{-1}(1-p_i)\n", " where $\\Phi^{-1}$ is the inverse CDF of the standard normal distribution.\n", " Parameters\n", " ----------\n", " pvalues : array_like\n", " Array of p-values to combine\n", " Returns\n", " -------\n", " float\n", " Liptak's combined test statistic\n", " \"\"\"\n", " return np.sum(norm.ppf(1 - pvalues))\n", "\n", "\n", "def tippett(pvalues):\n", " r\"\"\"\n", " Apply Tippett's combining function\n", " .. math:: \\max_i \\{1-p_i\\}\n", " Parameters\n", " ----------\n", " pvalues : array_like\n", " Array of p-values to combine\n", " Returns\n", " -------\n", " float\n", " Tippett's combined test statistic\n", " \"\"\"\n", " return np.max(1 - pvalues)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "n: 136 nx: 70 ts:{'Facilitated.learning': 0.051515151515151736, 'Provided.helpful.feedback': 0.06666666666666643, 'Is.an.expert': 0.01818181818181852, 'Graded.in.a.timely.manner': 0.17424242424242387, 'Graded.Fairly': 0.0848484848484845, 'Did.NOT.respond.to.email.promptly': 0.15757575757575726, 'Knowledgable.of.course.content': 0.0333333333333341, 'Helpful.feedback.vias.Canvas.discussion': 0.13333333333333375, 'Consistently.fulfilled.responsibilities': 0.06666666666666643, 'Considerate.in.communication': 0.0242424242424244, 'Treated.me.with.respect': 0.10303030303030347, 'Enthusiastic': 0.12575757575757596, 'Professional': 0.15000000000000036, 'TA.again': 0.15000000000000036}\n", "\n", "0 [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.5, 0.5, 1.0, 0.5, 0.5, 0.5, 1.0]\n", "1000 [0.8303393213572854, 0.7255489021956087, 0.8892215568862275, 0.27245508982035926, 0.7125748502994012, 0.4540918163672655, 0.8293413173652695, 0.49101796407185627, 0.6307385229540918, 0.8982035928143712, 0.4600798403193613, 0.49001996007984033, 0.27345309381237526, 0.47305389221556887]\n", "2000 [0.8361638361638362, 0.7372627372627373, 0.8901098901098901, 0.26823176823176825, 0.7207792207792207, 0.4515484515484515, 0.8416583416583416, 0.5154845154845155, 0.6213786213786214, 0.8931068931068931, 0.476023976023976, 0.494005994005994, 0.27672327672327673, 0.4825174825174825]\n", "3000 [0.8397734843437709, 0.7368421052631579, 0.8884077281812125, 0.2731512325116589, 0.7301798800799467, 0.4563624250499667, 0.8394403730846103, 0.5163224516988674, 0.6332445036642238, 0.896402398401066, 0.4703530979347102, 0.5036642238507661, 0.2741505662891406, 0.4793471019320453]\n", "4000 [0.8320839580209896, 0.7348825587206397, 0.8858070964517741, 0.276111944027986, 0.7311344327836082, 0.463768115942029, 0.8340829585207397, 0.515992003998001, 0.6326836581709145, 0.8958020989505248, 0.46676661669165415, 0.5002498750624688, 0.26936531734132935, 0.47976011994003]\n", "5000 [0.8310675729708117, 0.7313074770091963, 0.8816473410635746, 0.2720911635345862, 0.727109156337465, 0.469812075169932, 0.8348660535785686, 0.5179928028788484, 0.632546981207517, 0.8968412634946021, 0.468812475009996, 0.49780087964814074, 0.2694922031187525, 0.476609356257497]\n", "6000 [0.8328890369876708, 0.73458847050983, 0.8815394868377208, 0.273408863712096, 0.7240919693435521, 0.47184271909363545, 0.8333888703765412, 0.5178273908697101, 0.6344551816061312, 0.8970343218927024, 0.46984338553815397, 0.5009996667777408, 0.26824391869376873, 0.475674775074975]\n", "7000 [0.8341902313624678, 0.7356469580119965, 0.8797486432447872, 0.268494715795487, 0.7233647529277349, 0.4721508140531277, 0.8314767209368752, 0.5187089403027706, 0.6359611539560126, 0.8968866038274779, 0.4705798343330477, 0.5008568980291346, 0.2707797772065124, 0.4777206512425021]\n", "8000 [0.8359160209947513, 0.7345663584103974, 0.8804048987753061, 0.2693076730817296, 0.725568607848038, 0.47100724818795303, 0.83104223944014, 0.5168707823044238, 0.6369657585603599, 0.8979005248687828, 0.46838290427393153, 0.5029992501874532, 0.26905773556610846, 0.4783804048987753]\n", "9000 [0.8373694734503444, 0.7347256165296601, 0.8813596978449233, 0.2684958898022662, 0.7237280604310153, 0.4723394801155299, 0.8317040657631637, 0.5125527660519884, 0.6385247722728282, 0.8965785381026439, 0.4670073317040658, 0.503332592757165, 0.2707176183070429, 0.47833814707842703]\n" ] }, { "data": { "text/plain": [ "(0.9065186962607479,\n", " {'Facilitated.learning': 0.051515151515151736,\n", " 'Provided.helpful.feedback': 0.06666666666666643,\n", " 'Is.an.expert': 0.01818181818181852,\n", " 'Graded.in.a.timely.manner': 0.17424242424242387,\n", " 'Graded.Fairly': 0.0848484848484845,\n", " 'Did.NOT.respond.to.email.promptly': 0.15757575757575726,\n", " 'Knowledgable.of.course.content': 0.0333333333333341,\n", " 'Helpful.feedback.vias.Canvas.discussion': 0.13333333333333375,\n", " 'Consistently.fulfilled.responsibilities': 0.06666666666666643,\n", " 'Considerate.in.communication': 0.0242424242424244,\n", " 'Treated.me.with.respect': 0.10303030303030347,\n", " 'Enthusiastic': 0.12575757575757596,\n", " 'Professional': 0.15000000000000036,\n", " 'TA.again': 0.15000000000000036},\n", " {'Facilitated.learning': 0.835016498350165,\n", " 'Provided.helpful.feedback': 0.7364263573642635,\n", " 'Is.an.expert': 0.8813118688131187,\n", " 'Graded.in.a.timely.manner': 0.2696730326967303,\n", " 'Graded.Fairly': 0.7234276572342766,\n", " 'Did.NOT.respond.to.email.promptly': 0.47305269473052697,\n", " 'Knowledgable.of.course.content': 0.833916608339166,\n", " 'Helpful.feedback.vias.Canvas.discussion': 0.5122487751224878,\n", " 'Consistently.fulfilled.responsibilities': 0.6366363363663634,\n", " 'Considerate.in.communication': 0.8963103689631037,\n", " 'Treated.me.with.respect': 0.46935306469353066,\n", " 'Enthusiastic': 0.5017498250174982,\n", " 'Professional': 0.27017298270172985,\n", " 'TA.again': 0.4781521847815218})" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sim_npc(jesse, emily, qs, abs_mean_diff, combine=\"fisher\", prng=prng, \\\n", " reps=int(10**4), verbose=True)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "n: 136 nx: 66 ts:{'Facilitated.learning': 0.051515151515151736, 'Provided.helpful.feedback': -0.06666666666666643, 'Is.an.expert': 0.01818181818181852, 'Graded.in.a.timely.manner': -0.17424242424242387, 'Graded.Fairly': -0.0848484848484845, 'Did.NOT.respond.to.email.promptly': 0.15757575757575726, 'Knowledgable.of.course.content': 0.0333333333333341, 'Helpful.feedback.vias.Canvas.discussion': -0.13333333333333375, 'Consistently.fulfilled.responsibilities': 0.06666666666666643, 'Considerate.in.communication': -0.0242424242424244, 'Treated.me.with.respect': -0.10303030303030347, 'Enthusiastic': 0.12575757575757596, 'Professional': -0.15000000000000036, 'TA.again': -0.15000000000000036}\n", "\n", "0 [0.5, 1.0, 0.5, 1.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1.0, 0.5, 1.0, 0.5]\n", "1000 [0.43812375249500995, 0.6526946107784432, 0.45209580838323354, 0.8792415169660679, 0.6736526946107785, 0.23552894211576847, 0.43313373253493015, 0.7644710578842315, 0.312375249500998, 0.5898203592814372, 0.811377245508982, 0.24251497005988024, 0.9001996007984032, 0.7734530938123753]\n", "2000 [0.4160839160839161, 0.6503496503496503, 0.45204795204795206, 0.8776223776223776, 0.6618381618381618, 0.24225774225774227, 0.42507492507492506, 0.7592407592407593, 0.31218781218781216, 0.583916083916084, 0.7947052947052947, 0.24675324675324675, 0.8881118881118881, 0.7672327672327672]\n", "3000 [0.4100599600266489, 0.6465689540306462, 0.4473684210526316, 0.8704197201865423, 0.6675549633577615, 0.24550299800133243, 0.4177215189873418, 0.7551632245169887, 0.3117921385742838, 0.570286475682878, 0.7924716855429713, 0.24616922051965356, 0.8824117255163224, 0.7674883411059293]\n", "4000 [0.41179410294852575, 0.6476761619190404, 0.4535232383808096, 0.875312343828086, 0.6619190404797601, 0.24387806096951525, 0.4222888555722139, 0.7616191904047976, 0.31284357821089454, 0.5719640179910045, 0.7913543228385808, 0.24587706146926536, 0.8820589705147426, 0.7698650674662668]\n", "5000 [0.41203518592562977, 0.647141143542583, 0.45621751299480207, 0.8744502199120352, 0.6603358656537385, 0.23930427828868453, 0.4226309476209516, 0.7626949220311875, 0.3170731707317073, 0.5727708916433427, 0.789484206317473, 0.24710115953618553, 0.8798480607756898, 0.7704918032786885]\n", "6000 [0.41036321226257916, 0.6429523492169277, 0.4501832722425858, 0.8755414861712763, 0.6531156281239587, 0.2465844718427191, 0.41836054648450516, 0.7582472509163612, 0.31356214595134957, 0.5659780073308897, 0.7845718093968677, 0.2437520826391203, 0.8787070976341219, 0.7665778073975341]\n", "7000 [0.4124535846900885, 0.6452442159383034, 0.45215652670665524, 0.8774635818337618, 0.6552413596115395, 0.24478720365609827, 0.4181662382176521, 0.7587832047986289, 0.31348186232505, 0.572407883461868, 0.7863467580691231, 0.2493573264781491, 0.8803199085975436, 0.7710654098828906]\n", "8000 [0.4157710572356911, 0.6464633841539615, 0.45601099725068733, 0.8806548362909272, 0.6567108222944263, 0.2459385153711572, 0.4202699325168708, 0.7621844538865283, 0.3189202699325169, 0.5738565358660335, 0.7866783304173957, 0.252061984503874, 0.8782804298925269, 0.7736815796050988]\n", "9000 [0.4177960453232615, 0.6467451677405021, 0.4578982448344812, 0.8803599200177739, 0.6587425016662963, 0.24894467896023106, 0.42223950233281493, 0.762497222839369, 0.32015107753832484, 0.5735392135081093, 0.7862697178404799, 0.25261053099311265, 0.8773605865363253, 0.7739391246389691]\n" ] }, { "data": { "text/plain": [ "(0.7869426114777045,\n", " {'Facilitated.learning': 0.051515151515151736,\n", " 'Provided.helpful.feedback': -0.06666666666666643,\n", " 'Is.an.expert': 0.01818181818181852,\n", " 'Graded.in.a.timely.manner': -0.17424242424242387,\n", " 'Graded.Fairly': -0.0848484848484845,\n", " 'Did.NOT.respond.to.email.promptly': 0.15757575757575726,\n", " 'Knowledgable.of.course.content': 0.0333333333333341,\n", " 'Helpful.feedback.vias.Canvas.discussion': -0.13333333333333375,\n", " 'Consistently.fulfilled.responsibilities': 0.06666666666666643,\n", " 'Considerate.in.communication': -0.0242424242424244,\n", " 'Treated.me.with.respect': -0.10303030303030347,\n", " 'Enthusiastic': 0.12575757575757596,\n", " 'Professional': -0.15000000000000036,\n", " 'TA.again': -0.15000000000000036},\n", " {'Facilitated.learning': 0.41985801419858015,\n", " 'Provided.helpful.feedback': 0.6459354064593541,\n", " 'Is.an.expert': 0.45835416458354167,\n", " 'Graded.in.a.timely.manner': 0.8809119088091191,\n", " 'Graded.Fairly': 0.6594340565943405,\n", " 'Did.NOT.respond.to.email.promptly': 0.24987501249875013,\n", " 'Knowledgable.of.course.content': 0.42455754424557546,\n", " 'Helpful.feedback.vias.Canvas.discussion': 0.7619238076192381,\n", " 'Consistently.fulfilled.responsibilities': 0.32086791320867913,\n", " 'Considerate.in.communication': 0.5740425957404259,\n", " 'Treated.me.with.respect': 0.7851214878512148,\n", " 'Enthusiastic': 0.2521747825217478,\n", " 'Professional': 0.8780121987801219,\n", " 'TA.again': 0.7752224777522247})" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sim_npc(jesse, emily, qs, mean_diff, combine=\"tippett\", prng=prng, \\\n", " reps=int(10**4), verbose=True)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1.203092328742278e-05" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from scipy.stats import chi2\n", "chi2.sf(-2*np.log(0.08*1.0e-5),df=4)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 4 }