{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Permutation Tests"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# boilerplate\n",
"%matplotlib inline\n",
"import math\n",
"import numpy as np\n",
"import scipy as sp\n",
"import pandas as pd\n",
"from scipy import stats # distributions\n",
"from scipy import special # special functions\n",
"from scipy import random # random variables, distributions, etc.\n",
"from scipy.optimize import brentq\n",
"from scipy.stats import (binom, hypergeom)\n",
"import matplotlib.pyplot as plt\n",
"from ipywidgets import widgets\n",
"\n",
"khazan_fn = './Data/khazanEtal20.csv'"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" ID | \n",
" Class | \n",
" Gender | \n",
" Age | \n",
" number.online.courses.taken | \n",
" TA | \n",
" Question | \n",
" Likert.Score | \n",
" Theme | \n",
" ScoreNegPos | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" 3 | \n",
" female | \n",
" 21 | \n",
" 3 | \n",
" Jesse | \n",
" Facilitated.learning | \n",
" 4 | \n",
" teaching | \n",
" 1 | \n",
"
\n",
" \n",
" | 1 | \n",
" 1 | \n",
" 3 | \n",
" female | \n",
" 21 | \n",
" 3 | \n",
" Jesse | \n",
" Provided.helpful.feedback | \n",
" 5 | \n",
" teaching | \n",
" 2 | \n",
"
\n",
" \n",
" | 2 | \n",
" 1 | \n",
" 3 | \n",
" female | \n",
" 21 | \n",
" 3 | \n",
" Jesse | \n",
" Is.an.expert | \n",
" 4 | \n",
" knowledge | \n",
" 1 | \n",
"
\n",
" \n",
" | 3 | \n",
" 1 | \n",
" 3 | \n",
" female | \n",
" 21 | \n",
" 3 | \n",
" Jesse | \n",
" Graded.in.a.timely.manner | \n",
" 5 | \n",
" professional | \n",
" 2 | \n",
"
\n",
" \n",
" | 4 | \n",
" 1 | \n",
" 3 | \n",
" female | \n",
" 21 | \n",
" 3 | \n",
" Jesse | \n",
" Graded.Fairly | \n",
" 5 | \n",
" teaching | \n",
" 2 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" ID Class Gender Age number.online.courses.taken TA \\\n",
"0 1 3 female 21 3 Jesse \n",
"1 1 3 female 21 3 Jesse \n",
"2 1 3 female 21 3 Jesse \n",
"3 1 3 female 21 3 Jesse \n",
"4 1 3 female 21 3 Jesse \n",
"\n",
" Question Likert.Score Theme ScoreNegPos \n",
"0 Facilitated.learning 4 teaching 1 \n",
"1 Provided.helpful.feedback 5 teaching 2 \n",
"2 Is.an.expert 4 knowledge 1 \n",
"3 Graded.in.a.timely.manner 5 professional 2 \n",
"4 Graded.Fairly 5 teaching 2 "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.read_csv(khazan_fn)\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"qs = df['Question'].unique()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"for q in qs:\n",
" mask = df['Question'] == q\n",
" df.loc[mask,q] = df[mask]['Likert.Score']"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"df = df.drop(columns=['Class','Age','number.online.courses.taken','Question','Likert.Score',\\\n",
" 'Theme','ScoreNegPos'])\n",
"df = df.set_index('ID')\n",
"df['Did.NOT.respond.to.email.promptly'] = -df['Did.NOT.respond.to.email.promptly']"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Gender | \n",
" TA | \n",
" Facilitated.learning | \n",
" Provided.helpful.feedback | \n",
" Is.an.expert | \n",
" Graded.in.a.timely.manner | \n",
" Graded.Fairly | \n",
" Did.NOT.respond.to.email.promptly | \n",
" Knowledgable.of.course.content | \n",
" Helpful.feedback.vias.Canvas.discussion | \n",
" Consistently.fulfilled.responsibilities | \n",
" Considerate.in.communication | \n",
" Treated.me.with.respect | \n",
" Enthusiastic | \n",
" Professional | \n",
" TA.again | \n",
"
\n",
" \n",
" | ID | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" | 1 | \n",
" female | \n",
" Jesse | \n",
" 4.0 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" | 1 | \n",
" female | \n",
" Jesse | \n",
" NaN | \n",
" 5.0 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" | 1 | \n",
" female | \n",
" Jesse | \n",
" NaN | \n",
" NaN | \n",
" 4.0 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" | 1 | \n",
" female | \n",
" Jesse | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 5.0 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" | 1 | \n",
" female | \n",
" Jesse | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 5.0 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Gender TA Facilitated.learning Provided.helpful.feedback \\\n",
"ID \n",
"1 female Jesse 4.0 NaN \n",
"1 female Jesse NaN 5.0 \n",
"1 female Jesse NaN NaN \n",
"1 female Jesse NaN NaN \n",
"1 female Jesse NaN NaN \n",
"\n",
" Is.an.expert Graded.in.a.timely.manner Graded.Fairly \\\n",
"ID \n",
"1 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"1 4.0 NaN NaN \n",
"1 NaN 5.0 NaN \n",
"1 NaN NaN 5.0 \n",
"\n",
" Did.NOT.respond.to.email.promptly Knowledgable.of.course.content \\\n",
"ID \n",
"1 NaN NaN \n",
"1 NaN NaN \n",
"1 NaN NaN \n",
"1 NaN NaN \n",
"1 NaN NaN \n",
"\n",
" Helpful.feedback.vias.Canvas.discussion \\\n",
"ID \n",
"1 NaN \n",
"1 NaN \n",
"1 NaN \n",
"1 NaN \n",
"1 NaN \n",
"\n",
" Consistently.fulfilled.responsibilities Considerate.in.communication \\\n",
"ID \n",
"1 NaN NaN \n",
"1 NaN NaN \n",
"1 NaN NaN \n",
"1 NaN NaN \n",
"1 NaN NaN \n",
"\n",
" Treated.me.with.respect Enthusiastic Professional TA.again \n",
"ID \n",
"1 NaN NaN NaN NaN \n",
"1 NaN NaN NaN NaN \n",
"1 NaN NaN NaN NaN \n",
"1 NaN NaN NaN NaN \n",
"1 NaN NaN NaN NaN "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" ID | \n",
" TA | \n",
" Facilitated.learning | \n",
" Provided.helpful.feedback | \n",
" Is.an.expert | \n",
" Graded.in.a.timely.manner | \n",
" Graded.Fairly | \n",
" Did.NOT.respond.to.email.promptly | \n",
" Knowledgable.of.course.content | \n",
" Helpful.feedback.vias.Canvas.discussion | \n",
" Consistently.fulfilled.responsibilities | \n",
" Considerate.in.communication | \n",
" Treated.me.with.respect | \n",
" Enthusiastic | \n",
" Professional | \n",
" TA.again | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" Jesse | \n",
" 4.0 | \n",
" 5.0 | \n",
" 4.0 | \n",
" 5.0 | \n",
" 5.0 | \n",
" -5.0 | \n",
" 5.0 | \n",
" 5.0 | \n",
" 5.0 | \n",
" 5.0 | \n",
" 5.0 | \n",
" 5.0 | \n",
" 5.0 | \n",
" 4.0 | \n",
"
\n",
" \n",
" | 1 | \n",
" 2 | \n",
" Jesse | \n",
" 4.0 | \n",
" 5.0 | \n",
" 5.0 | \n",
" 4.0 | \n",
" 4.0 | \n",
" -2.0 | \n",
" 4.0 | \n",
" 3.0 | \n",
" 5.0 | \n",
" 5.0 | \n",
" 5.0 | \n",
" 5.0 | \n",
" 5.0 | \n",
" 5.0 | \n",
"
\n",
" \n",
" | 2 | \n",
" 3 | \n",
" Jesse | \n",
" 5.0 | \n",
" 5.0 | \n",
" 5.0 | \n",
" 5.0 | \n",
" 5.0 | \n",
" -5.0 | \n",
" 5.0 | \n",
" 5.0 | \n",
" 5.0 | \n",
" 5.0 | \n",
" 5.0 | \n",
" 5.0 | \n",
" 5.0 | \n",
" 5.0 | \n",
"
\n",
" \n",
" | 3 | \n",
" 4 | \n",
" Jesse | \n",
" 4.0 | \n",
" 4.0 | \n",
" 4.0 | \n",
" 4.0 | \n",
" 4.0 | \n",
" -4.0 | \n",
" 5.0 | \n",
" 4.0 | \n",
" 4.0 | \n",
" 4.0 | \n",
" 5.0 | \n",
" 4.0 | \n",
" 5.0 | \n",
" 4.0 | \n",
"
\n",
" \n",
" | 4 | \n",
" 5 | \n",
" Emily | \n",
" 4.0 | \n",
" 4.0 | \n",
" 3.0 | \n",
" 4.0 | \n",
" 2.0 | \n",
" -4.0 | \n",
" 3.0 | \n",
" 3.0 | \n",
" 5.0 | \n",
" 3.0 | \n",
" 3.0 | \n",
" 3.0 | \n",
" 4.0 | \n",
" 3.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" ID TA Facilitated.learning Provided.helpful.feedback Is.an.expert \\\n",
"0 1 Jesse 4.0 5.0 4.0 \n",
"1 2 Jesse 4.0 5.0 5.0 \n",
"2 3 Jesse 5.0 5.0 5.0 \n",
"3 4 Jesse 4.0 4.0 4.0 \n",
"4 5 Emily 4.0 4.0 3.0 \n",
"\n",
" Graded.in.a.timely.manner Graded.Fairly \\\n",
"0 5.0 5.0 \n",
"1 4.0 4.0 \n",
"2 5.0 5.0 \n",
"3 4.0 4.0 \n",
"4 4.0 2.0 \n",
"\n",
" Did.NOT.respond.to.email.promptly Knowledgable.of.course.content \\\n",
"0 -5.0 5.0 \n",
"1 -2.0 4.0 \n",
"2 -5.0 5.0 \n",
"3 -4.0 5.0 \n",
"4 -4.0 3.0 \n",
"\n",
" Helpful.feedback.vias.Canvas.discussion \\\n",
"0 5.0 \n",
"1 3.0 \n",
"2 5.0 \n",
"3 4.0 \n",
"4 3.0 \n",
"\n",
" Consistently.fulfilled.responsibilities Considerate.in.communication \\\n",
"0 5.0 5.0 \n",
"1 5.0 5.0 \n",
"2 5.0 5.0 \n",
"3 4.0 4.0 \n",
"4 5.0 3.0 \n",
"\n",
" Treated.me.with.respect Enthusiastic Professional TA.again \n",
"0 5.0 5.0 5.0 4.0 \n",
"1 5.0 5.0 5.0 5.0 \n",
"2 5.0 5.0 5.0 5.0 \n",
"3 5.0 4.0 5.0 4.0 \n",
"4 3.0 3.0 4.0 3.0 "
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"agg_dict = {'TA': 'first'}\n",
"for q in qs:\n",
" agg_dict[q] = np.nansum\n",
"scores = df.groupby('ID').agg(agg_dict).reset_index()\n",
"scores.head()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" ID | \n",
" TA | \n",
" Facilitated.learning | \n",
" Provided.helpful.feedback | \n",
" Is.an.expert | \n",
" Graded.in.a.timely.manner | \n",
" Graded.Fairly | \n",
" Did.NOT.respond.to.email.promptly | \n",
" Knowledgable.of.course.content | \n",
" Helpful.feedback.vias.Canvas.discussion | \n",
" Consistently.fulfilled.responsibilities | \n",
" Considerate.in.communication | \n",
" Treated.me.with.respect | \n",
" Enthusiastic | \n",
" Professional | \n",
" TA.again | \n",
"
\n",
" \n",
" \n",
" \n",
" | 4 | \n",
" 5 | \n",
" Emily | \n",
" 4.0 | \n",
" 4.0 | \n",
" 3.0 | \n",
" 4.0 | \n",
" 2.0 | \n",
" -4.0 | \n",
" 3.0 | \n",
" 3.0 | \n",
" 5.0 | \n",
" 3.0 | \n",
" 3.0 | \n",
" 3.0 | \n",
" 4.0 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 5 | \n",
" 6 | \n",
" Emily | \n",
" 3.0 | \n",
" 4.0 | \n",
" 3.0 | \n",
" 4.0 | \n",
" 4.0 | \n",
" -3.0 | \n",
" 4.0 | \n",
" 3.0 | \n",
" 4.0 | \n",
" 3.0 | \n",
" 3.0 | \n",
" 3.0 | \n",
" 3.0 | \n",
" 4.0 | \n",
"
\n",
" \n",
" | 6 | \n",
" 7 | \n",
" Emily | \n",
" 4.0 | \n",
" 5.0 | \n",
" 4.0 | \n",
" 5.0 | \n",
" 3.0 | \n",
" -5.0 | \n",
" 4.0 | \n",
" 4.0 | \n",
" 4.0 | \n",
" 4.0 | \n",
" 4.0 | \n",
" 4.0 | \n",
" 5.0 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 10 | \n",
" 11 | \n",
" Emily | \n",
" 3.0 | \n",
" 3.0 | \n",
" 4.0 | \n",
" 3.0 | \n",
" 2.0 | \n",
" -3.0 | \n",
" 3.0 | \n",
" 3.0 | \n",
" 5.0 | \n",
" 5.0 | \n",
" 5.0 | \n",
" 5.0 | \n",
" 3.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" | 11 | \n",
" 12 | \n",
" Emily | \n",
" 5.0 | \n",
" 5.0 | \n",
" 5.0 | \n",
" 5.0 | \n",
" 5.0 | \n",
" -5.0 | \n",
" 5.0 | \n",
" 5.0 | \n",
" 5.0 | \n",
" 5.0 | \n",
" 5.0 | \n",
" 5.0 | \n",
" 5.0 | \n",
" 5.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" ID TA Facilitated.learning Provided.helpful.feedback Is.an.expert \\\n",
"4 5 Emily 4.0 4.0 3.0 \n",
"5 6 Emily 3.0 4.0 3.0 \n",
"6 7 Emily 4.0 5.0 4.0 \n",
"10 11 Emily 3.0 3.0 4.0 \n",
"11 12 Emily 5.0 5.0 5.0 \n",
"\n",
" Graded.in.a.timely.manner Graded.Fairly \\\n",
"4 4.0 2.0 \n",
"5 4.0 4.0 \n",
"6 5.0 3.0 \n",
"10 3.0 2.0 \n",
"11 5.0 5.0 \n",
"\n",
" Did.NOT.respond.to.email.promptly Knowledgable.of.course.content \\\n",
"4 -4.0 3.0 \n",
"5 -3.0 4.0 \n",
"6 -5.0 4.0 \n",
"10 -3.0 3.0 \n",
"11 -5.0 5.0 \n",
"\n",
" Helpful.feedback.vias.Canvas.discussion \\\n",
"4 3.0 \n",
"5 3.0 \n",
"6 4.0 \n",
"10 3.0 \n",
"11 5.0 \n",
"\n",
" Consistently.fulfilled.responsibilities Considerate.in.communication \\\n",
"4 5.0 3.0 \n",
"5 4.0 3.0 \n",
"6 4.0 4.0 \n",
"10 5.0 5.0 \n",
"11 5.0 5.0 \n",
"\n",
" Treated.me.with.respect Enthusiastic Professional TA.again \n",
"4 3.0 3.0 4.0 3.0 \n",
"5 3.0 3.0 3.0 4.0 \n",
"6 4.0 4.0 5.0 3.0 \n",
"10 5.0 5.0 3.0 1.0 \n",
"11 5.0 5.0 5.0 5.0 "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mask = (scores['TA'] == 'Jesse')\n",
"jesse = scores.loc[mask].copy()\n",
"emily = scores.loc[~mask].copy()\n",
"emily.head()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ID Facilitated.learning Provided.helpful.feedback \\\n",
"count 66.000000 55.000000 55.000000 \n",
"mean 48.878788 4.018182 4.200000 \n",
"std 35.912507 1.146507 1.128749 \n",
"min 0.000000 1.000000 1.000000 \n",
"25% 13.750000 3.500000 4.000000 \n",
"50% 53.000000 4.000000 5.000000 \n",
"75% 76.250000 5.000000 5.000000 \n",
"max 111.000000 5.000000 5.000000 \n",
"\n",
" Is.an.expert Graded.in.a.timely.manner Graded.Fairly \\\n",
"count 55.000000 55.000000 55.000000 \n",
"mean 4.018182 4.309091 3.981818 \n",
"std 1.113734 0.920401 1.146507 \n",
"min 1.000000 1.000000 1.000000 \n",
"25% 3.500000 4.000000 3.500000 \n",
"50% 4.000000 5.000000 4.000000 \n",
"75% 5.000000 5.000000 5.000000 \n",
"max 5.000000 5.000000 5.000000 \n",
"\n",
" Did.NOT.respond.to.email.promptly Knowledgable.of.course.content \\\n",
"count 55.000000 55.000000 \n",
"mean -3.909091 4.400000 \n",
"std 1.251262 0.852013 \n",
"min -5.000000 1.000000 \n",
"25% -5.000000 4.000000 \n",
"50% -4.000000 5.000000 \n",
"75% -3.000000 5.000000 \n",
"max -1.000000 5.000000 \n",
"\n",
" Helpful.feedback.vias.Canvas.discussion \\\n",
"count 55.000000 \n",
"mean 4.000000 \n",
"std 1.122167 \n",
"min 1.000000 \n",
"25% 3.000000 \n",
"50% 4.000000 \n",
"75% 5.000000 \n",
"max 5.000000 \n",
"\n",
" Consistently.fulfilled.responsibilities Considerate.in.communication \\\n",
"count 55.000000 55.000000 \n",
"mean 4.600000 4.509091 \n",
"std 0.735351 0.857920 \n",
"min 2.000000 1.000000 \n",
"25% 4.000000 4.000000 \n",
"50% 5.000000 5.000000 \n",
"75% 5.000000 5.000000 \n",
"max 5.000000 5.000000 \n",
"\n",
" Treated.me.with.respect Enthusiastic Professional TA.again \n",
"count 55.000000 55.000000 55.000000 55.000000 \n",
"mean 4.563636 4.309091 4.600000 3.800000 \n",
"std 0.787956 1.034099 0.807373 1.176939 \n",
"min 2.000000 1.000000 2.000000 1.000000 \n",
"25% 4.000000 4.000000 5.000000 3.000000 \n",
"50% 5.000000 5.000000 5.000000 4.000000 \n",
"75% 5.000000 5.000000 5.000000 5.000000 \n",
"max 5.000000 5.000000 5.000000 5.000000 ID Facilitated.learning Provided.helpful.feedback \\\n",
"count 70.000000 60.000000 60.000000 \n",
"mean 49.200000 3.966667 4.266667 \n",
"std 38.566938 0.882344 1.006195 \n",
"min 0.000000 1.000000 1.000000 \n",
"25% 14.250000 3.750000 4.000000 \n",
"50% 44.500000 4.000000 5.000000 \n",
"75% 83.750000 5.000000 5.000000 \n",
"max 115.000000 5.000000 5.000000 \n",
"\n",
" Is.an.expert Graded.in.a.timely.manner Graded.Fairly \\\n",
"count 60.000000 60.000000 60.000000 \n",
"mean 4.000000 4.483333 4.066667 \n",
"std 0.802538 0.700887 1.087162 \n",
"min 2.000000 2.000000 1.000000 \n",
"25% 3.000000 4.000000 4.000000 \n",
"50% 4.000000 5.000000 4.000000 \n",
"75% 5.000000 5.000000 5.000000 \n",
"max 5.000000 5.000000 5.000000 \n",
"\n",
" Did.NOT.respond.to.email.promptly Knowledgable.of.course.content \\\n",
"count 60.000000 60.000000 \n",
"mean -4.066667 4.366667 \n",
"std 1.205449 0.735692 \n",
"min -5.000000 3.000000 \n",
"25% -5.000000 4.000000 \n",
"50% -5.000000 5.000000 \n",
"75% -3.000000 5.000000 \n",
"max -1.000000 5.000000 \n",
"\n",
" Helpful.feedback.vias.Canvas.discussion \\\n",
"count 60.000000 \n",
"mean 4.133333 \n",
"std 0.947193 \n",
"min 2.000000 \n",
"25% 3.750000 \n",
"50% 4.000000 \n",
"75% 5.000000 \n",
"max 5.000000 \n",
"\n",
" Consistently.fulfilled.responsibilities Considerate.in.communication \\\n",
"count 60.000000 60.000000 \n",
"mean 4.533333 4.533333 \n",
"std 0.724081 0.700282 \n",
"min 2.000000 2.000000 \n",
"25% 4.000000 4.000000 \n",
"50% 5.000000 5.000000 \n",
"75% 5.000000 5.000000 \n",
"max 5.000000 5.000000 \n",
"\n",
" Treated.me.with.respect Enthusiastic Professional TA.again \n",
"count 60.000000 60.000000 60.000000 60.000000 \n",
"mean 4.666667 4.183333 4.750000 3.950000 \n",
"std 0.680644 0.892372 0.571202 1.048405 \n",
"min 2.000000 2.000000 3.000000 1.000000 \n",
"25% 5.000000 3.000000 5.000000 3.000000 \n",
"50% 5.000000 4.000000 5.000000 4.000000 \n",
"75% 5.000000 5.000000 5.000000 5.000000 \n",
"max 5.000000 5.000000 5.000000 5.000000 \n"
]
}
],
"source": [
"# there were 11 nonresponders for \"Emily\" and 10 for \"Jesse\"\n",
"emily_missing = {'ID':0, 'TA':'Emily' }\n",
"jesse_missing = {'ID':0, 'TA':'Jesse' }\n",
"for q in qs:\n",
" emily_missing[q] = np.nan\n",
" jesse_missing[q] = np.nan\n",
"for i in range(11):\n",
" emily = emily.append(emily_missing, ignore_index=True)\n",
"for i in range(10):\n",
" jesse = jesse.append(jesse_missing, ignore_index=True)\n",
"print(emily.describe(), jesse.describe())"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"from scipy.stats import norm, rankdata\n",
"from cryptorandom.sample import random_sample\n",
"from cryptorandom.cryptorandom import SHA256\n",
"from permute.utils import get_prng, permute\n",
"prng = SHA256(1234567890)\n",
"\n",
"def abs_mean_diff(x, y):\n",
" return np.abs(np.nanmean(x)-np.nanmean(y))\n",
"\n",
"def mean_diff(x, y):\n",
" return np.nanmean(x)-np.nanmean(y)\n",
"\n",
"def sim_npc(X, Y, cols, test_fn, combine=\"fisher\", prng=None, reps=int(10**4), verbose=False):\n",
" ts = {}\n",
" tv = {}\n",
" ps = {}\n",
" XY = pd.concat([X,Y])\n",
" nx = len(X[cols[0]])\n",
" n = len(XY[cols[0]])\n",
" all_i = set(range(n))\n",
" for c in cols:\n",
" ts[c] = test_fn(X[c], Y[c])\n",
" tv[c] = []\n",
" if verbose:\n",
" print('\\nn: {} nx: {} ts:{}\\n'.format(n, nx, ts))\n",
" for i in range(reps):\n",
" inx = random_sample(n, size=nx, replace=False, prng=prng) \n",
" iny = list(all_i - set(inx)) \n",
" for c in cols:\n",
" tv[c].append(test_fn(XY.iloc[inx][c], XY.iloc[iny][c]))\n",
" if verbose and i%(int(reps/10)) == 0:\n",
" print(i, [(np.sum(np.array(tv[c]) >= ts[c]) + 1)/(i+2) for c in cols]) \n",
" for c in cols:\n",
" ps[c] = (np.sum(np.array(tv[c]) >= ts[c]) + 1)/(reps+1) \n",
" dist = np.array([tv[c] for c in cols]).T\n",
" dist = np.append(dist, np.array([ts[c] for c in cols], ndmin=2), axis=0)\n",
" p = npc(np.array([ps[c] for c in cols]), dist, combine=combine)\n",
" return p, ts, ps\n",
"\n",
"def npc(pvalues, distr, combine=\"fisher\", plus1=True):\n",
" r\"\"\"\n",
" Combines p-values from individual partial test hypotheses $H_{0i}$ against\n",
" $H_{1i}$, $i=1,\\dots,n$ to test the global null hypothesis\n",
" .. math:: \\cap_{i=1}^n H_{0i}\n",
" against the alternative\n",
" .. math:: \\cup_{i=1}^n H_{1i}\n",
" using an omnibus test statistic.\n",
" Parameters\n",
" ----------\n",
" pvalues : array_like\n",
" Array of p-values to combine\n",
" distr : array_like\n",
" Array of dimension [B, n] where B is the number of permutations and n is\n",
" the number of partial hypothesis tests. The $i$th column of distr contains\n",
" the simulated null distribution of the $i$th test statistic under $H_{0i}$.\n",
" combine : {'fisher', 'liptak', 'tippett'} or function\n",
" The combining function to use. Default is \"fisher\".\n",
" Valid combining functions must take in p-values as their argument and be\n",
" monotonically decreasing in each p-value.\n",
" plus1 : bool\n",
" flag for whether to add 1 to the numerator and denominator of the\n",
" p-value based on the empirical permutation distribution. \n",
" Default is True.\n",
" Returns\n",
" -------\n",
" float\n",
" A single p-value for the global test\n",
" \"\"\"\n",
" n = len(pvalues)\n",
" B = distr.shape[0]\n",
" if n < 2:\n",
" raise ValueError(\"One p-value: nothing to combine!\")\n",
" if n != distr.shape[1]:\n",
" raise ValueError(\"Mismatch in number of p-values and size of distr\")\n",
"\n",
" combine_library = {\n",
" \"fisher\": fisher,\n",
" \"liptak\": liptak,\n",
" \"tippett\": tippett\n",
" }\n",
" if callable(combine):\n",
" if not check_combfunc_monotonic(pvalues, combine):\n",
" raise ValueError(\n",
" \"Bad combining function: must be monotonically decreasing in each p-value\")\n",
" combine_func = combine\n",
" else:\n",
" combine_func = combine_library[combine]\n",
"\n",
" # Convert test statistic distribution to p-values\n",
" combined_stat_distr = [0] * B\n",
" pvalues_from_distr = np.zeros((B, n))\n",
" for j in range(n):\n",
" pvalues_from_distr[:, j] = 1 - rankdata(distr[:, j], method=\"min\")/(plus1+B) + (1 + plus1)/(plus1+B)\n",
" if combine == \"liptak\":\n",
" toobig = np.where(pvalues_from_distr >= 1)\n",
" pvalues_from_distr[toobig] = 1 - np.finfo(float).eps\n",
" combined_stat_distr = np.apply_along_axis(\n",
" combine_func, 1, pvalues_from_distr)\n",
"\n",
" observed_combined_stat = combine_func(pvalues)\n",
" return (plus1 + np.sum(combined_stat_distr >= observed_combined_stat)) / (plus1+B)\n",
"\n",
"def fisher(pvalues):\n",
" r\"\"\"\n",
" Apply Fisher's combining function\n",
" .. math:: -2 \\sum_i \\log(p_i)\n",
" Parameters\n",
" ----------\n",
" pvalues : array_like\n",
" Array of p-values to combine\n",
" Returns\n",
" -------\n",
" float\n",
" Fisher's combined test statistic\n",
" \"\"\"\n",
" return -2*np.log(np.prod(pvalues))\n",
"\n",
"def liptak(pvalues):\n",
" r\"\"\"\n",
" Apply Liptak's combining function\n",
" .. math:: \\sum_i \\Phi^{-1}(1-p_i)\n",
" where $\\Phi^{-1}$ is the inverse CDF of the standard normal distribution.\n",
" Parameters\n",
" ----------\n",
" pvalues : array_like\n",
" Array of p-values to combine\n",
" Returns\n",
" -------\n",
" float\n",
" Liptak's combined test statistic\n",
" \"\"\"\n",
" return np.sum(norm.ppf(1 - pvalues))\n",
"\n",
"\n",
"def tippett(pvalues):\n",
" r\"\"\"\n",
" Apply Tippett's combining function\n",
" .. math:: \\max_i \\{1-p_i\\}\n",
" Parameters\n",
" ----------\n",
" pvalues : array_like\n",
" Array of p-values to combine\n",
" Returns\n",
" -------\n",
" float\n",
" Tippett's combined test statistic\n",
" \"\"\"\n",
" return np.max(1 - pvalues)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"n: 136 nx: 70 ts:{'Facilitated.learning': 0.051515151515151736, 'Provided.helpful.feedback': 0.06666666666666643, 'Is.an.expert': 0.01818181818181852, 'Graded.in.a.timely.manner': 0.17424242424242387, 'Graded.Fairly': 0.0848484848484845, 'Did.NOT.respond.to.email.promptly': 0.15757575757575726, 'Knowledgable.of.course.content': 0.0333333333333341, 'Helpful.feedback.vias.Canvas.discussion': 0.13333333333333375, 'Consistently.fulfilled.responsibilities': 0.06666666666666643, 'Considerate.in.communication': 0.0242424242424244, 'Treated.me.with.respect': 0.10303030303030347, 'Enthusiastic': 0.12575757575757596, 'Professional': 0.15000000000000036, 'TA.again': 0.15000000000000036}\n",
"\n",
"0 [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.5, 0.5, 1.0, 0.5, 0.5, 0.5, 1.0]\n",
"1000 [0.8303393213572854, 0.7255489021956087, 0.8892215568862275, 0.27245508982035926, 0.7125748502994012, 0.4540918163672655, 0.8293413173652695, 0.49101796407185627, 0.6307385229540918, 0.8982035928143712, 0.4600798403193613, 0.49001996007984033, 0.27345309381237526, 0.47305389221556887]\n",
"2000 [0.8361638361638362, 0.7372627372627373, 0.8901098901098901, 0.26823176823176825, 0.7207792207792207, 0.4515484515484515, 0.8416583416583416, 0.5154845154845155, 0.6213786213786214, 0.8931068931068931, 0.476023976023976, 0.494005994005994, 0.27672327672327673, 0.4825174825174825]\n",
"3000 [0.8397734843437709, 0.7368421052631579, 0.8884077281812125, 0.2731512325116589, 0.7301798800799467, 0.4563624250499667, 0.8394403730846103, 0.5163224516988674, 0.6332445036642238, 0.896402398401066, 0.4703530979347102, 0.5036642238507661, 0.2741505662891406, 0.4793471019320453]\n",
"4000 [0.8320839580209896, 0.7348825587206397, 0.8858070964517741, 0.276111944027986, 0.7311344327836082, 0.463768115942029, 0.8340829585207397, 0.515992003998001, 0.6326836581709145, 0.8958020989505248, 0.46676661669165415, 0.5002498750624688, 0.26936531734132935, 0.47976011994003]\n",
"5000 [0.8310675729708117, 0.7313074770091963, 0.8816473410635746, 0.2720911635345862, 0.727109156337465, 0.469812075169932, 0.8348660535785686, 0.5179928028788484, 0.632546981207517, 0.8968412634946021, 0.468812475009996, 0.49780087964814074, 0.2694922031187525, 0.476609356257497]\n",
"6000 [0.8328890369876708, 0.73458847050983, 0.8815394868377208, 0.273408863712096, 0.7240919693435521, 0.47184271909363545, 0.8333888703765412, 0.5178273908697101, 0.6344551816061312, 0.8970343218927024, 0.46984338553815397, 0.5009996667777408, 0.26824391869376873, 0.475674775074975]\n",
"7000 [0.8341902313624678, 0.7356469580119965, 0.8797486432447872, 0.268494715795487, 0.7233647529277349, 0.4721508140531277, 0.8314767209368752, 0.5187089403027706, 0.6359611539560126, 0.8968866038274779, 0.4705798343330477, 0.5008568980291346, 0.2707797772065124, 0.4777206512425021]\n",
"8000 [0.8359160209947513, 0.7345663584103974, 0.8804048987753061, 0.2693076730817296, 0.725568607848038, 0.47100724818795303, 0.83104223944014, 0.5168707823044238, 0.6369657585603599, 0.8979005248687828, 0.46838290427393153, 0.5029992501874532, 0.26905773556610846, 0.4783804048987753]\n",
"9000 [0.8373694734503444, 0.7347256165296601, 0.8813596978449233, 0.2684958898022662, 0.7237280604310153, 0.4723394801155299, 0.8317040657631637, 0.5125527660519884, 0.6385247722728282, 0.8965785381026439, 0.4670073317040658, 0.503332592757165, 0.2707176183070429, 0.47833814707842703]\n"
]
},
{
"data": {
"text/plain": [
"(0.9065186962607479,\n",
" {'Facilitated.learning': 0.051515151515151736,\n",
" 'Provided.helpful.feedback': 0.06666666666666643,\n",
" 'Is.an.expert': 0.01818181818181852,\n",
" 'Graded.in.a.timely.manner': 0.17424242424242387,\n",
" 'Graded.Fairly': 0.0848484848484845,\n",
" 'Did.NOT.respond.to.email.promptly': 0.15757575757575726,\n",
" 'Knowledgable.of.course.content': 0.0333333333333341,\n",
" 'Helpful.feedback.vias.Canvas.discussion': 0.13333333333333375,\n",
" 'Consistently.fulfilled.responsibilities': 0.06666666666666643,\n",
" 'Considerate.in.communication': 0.0242424242424244,\n",
" 'Treated.me.with.respect': 0.10303030303030347,\n",
" 'Enthusiastic': 0.12575757575757596,\n",
" 'Professional': 0.15000000000000036,\n",
" 'TA.again': 0.15000000000000036},\n",
" {'Facilitated.learning': 0.835016498350165,\n",
" 'Provided.helpful.feedback': 0.7364263573642635,\n",
" 'Is.an.expert': 0.8813118688131187,\n",
" 'Graded.in.a.timely.manner': 0.2696730326967303,\n",
" 'Graded.Fairly': 0.7234276572342766,\n",
" 'Did.NOT.respond.to.email.promptly': 0.47305269473052697,\n",
" 'Knowledgable.of.course.content': 0.833916608339166,\n",
" 'Helpful.feedback.vias.Canvas.discussion': 0.5122487751224878,\n",
" 'Consistently.fulfilled.responsibilities': 0.6366363363663634,\n",
" 'Considerate.in.communication': 0.8963103689631037,\n",
" 'Treated.me.with.respect': 0.46935306469353066,\n",
" 'Enthusiastic': 0.5017498250174982,\n",
" 'Professional': 0.27017298270172985,\n",
" 'TA.again': 0.4781521847815218})"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sim_npc(jesse, emily, qs, abs_mean_diff, combine=\"fisher\", prng=prng, \\\n",
" reps=int(10**4), verbose=True)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"n: 136 nx: 66 ts:{'Facilitated.learning': 0.051515151515151736, 'Provided.helpful.feedback': -0.06666666666666643, 'Is.an.expert': 0.01818181818181852, 'Graded.in.a.timely.manner': -0.17424242424242387, 'Graded.Fairly': -0.0848484848484845, 'Did.NOT.respond.to.email.promptly': 0.15757575757575726, 'Knowledgable.of.course.content': 0.0333333333333341, 'Helpful.feedback.vias.Canvas.discussion': -0.13333333333333375, 'Consistently.fulfilled.responsibilities': 0.06666666666666643, 'Considerate.in.communication': -0.0242424242424244, 'Treated.me.with.respect': -0.10303030303030347, 'Enthusiastic': 0.12575757575757596, 'Professional': -0.15000000000000036, 'TA.again': -0.15000000000000036}\n",
"\n",
"0 [0.5, 1.0, 0.5, 1.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1.0, 0.5, 1.0, 0.5]\n",
"1000 [0.43812375249500995, 0.6526946107784432, 0.45209580838323354, 0.8792415169660679, 0.6736526946107785, 0.23552894211576847, 0.43313373253493015, 0.7644710578842315, 0.312375249500998, 0.5898203592814372, 0.811377245508982, 0.24251497005988024, 0.9001996007984032, 0.7734530938123753]\n",
"2000 [0.4160839160839161, 0.6503496503496503, 0.45204795204795206, 0.8776223776223776, 0.6618381618381618, 0.24225774225774227, 0.42507492507492506, 0.7592407592407593, 0.31218781218781216, 0.583916083916084, 0.7947052947052947, 0.24675324675324675, 0.8881118881118881, 0.7672327672327672]\n",
"3000 [0.4100599600266489, 0.6465689540306462, 0.4473684210526316, 0.8704197201865423, 0.6675549633577615, 0.24550299800133243, 0.4177215189873418, 0.7551632245169887, 0.3117921385742838, 0.570286475682878, 0.7924716855429713, 0.24616922051965356, 0.8824117255163224, 0.7674883411059293]\n",
"4000 [0.41179410294852575, 0.6476761619190404, 0.4535232383808096, 0.875312343828086, 0.6619190404797601, 0.24387806096951525, 0.4222888555722139, 0.7616191904047976, 0.31284357821089454, 0.5719640179910045, 0.7913543228385808, 0.24587706146926536, 0.8820589705147426, 0.7698650674662668]\n",
"5000 [0.41203518592562977, 0.647141143542583, 0.45621751299480207, 0.8744502199120352, 0.6603358656537385, 0.23930427828868453, 0.4226309476209516, 0.7626949220311875, 0.3170731707317073, 0.5727708916433427, 0.789484206317473, 0.24710115953618553, 0.8798480607756898, 0.7704918032786885]\n",
"6000 [0.41036321226257916, 0.6429523492169277, 0.4501832722425858, 0.8755414861712763, 0.6531156281239587, 0.2465844718427191, 0.41836054648450516, 0.7582472509163612, 0.31356214595134957, 0.5659780073308897, 0.7845718093968677, 0.2437520826391203, 0.8787070976341219, 0.7665778073975341]\n",
"7000 [0.4124535846900885, 0.6452442159383034, 0.45215652670665524, 0.8774635818337618, 0.6552413596115395, 0.24478720365609827, 0.4181662382176521, 0.7587832047986289, 0.31348186232505, 0.572407883461868, 0.7863467580691231, 0.2493573264781491, 0.8803199085975436, 0.7710654098828906]\n",
"8000 [0.4157710572356911, 0.6464633841539615, 0.45601099725068733, 0.8806548362909272, 0.6567108222944263, 0.2459385153711572, 0.4202699325168708, 0.7621844538865283, 0.3189202699325169, 0.5738565358660335, 0.7866783304173957, 0.252061984503874, 0.8782804298925269, 0.7736815796050988]\n",
"9000 [0.4177960453232615, 0.6467451677405021, 0.4578982448344812, 0.8803599200177739, 0.6587425016662963, 0.24894467896023106, 0.42223950233281493, 0.762497222839369, 0.32015107753832484, 0.5735392135081093, 0.7862697178404799, 0.25261053099311265, 0.8773605865363253, 0.7739391246389691]\n"
]
},
{
"data": {
"text/plain": [
"(0.7869426114777045,\n",
" {'Facilitated.learning': 0.051515151515151736,\n",
" 'Provided.helpful.feedback': -0.06666666666666643,\n",
" 'Is.an.expert': 0.01818181818181852,\n",
" 'Graded.in.a.timely.manner': -0.17424242424242387,\n",
" 'Graded.Fairly': -0.0848484848484845,\n",
" 'Did.NOT.respond.to.email.promptly': 0.15757575757575726,\n",
" 'Knowledgable.of.course.content': 0.0333333333333341,\n",
" 'Helpful.feedback.vias.Canvas.discussion': -0.13333333333333375,\n",
" 'Consistently.fulfilled.responsibilities': 0.06666666666666643,\n",
" 'Considerate.in.communication': -0.0242424242424244,\n",
" 'Treated.me.with.respect': -0.10303030303030347,\n",
" 'Enthusiastic': 0.12575757575757596,\n",
" 'Professional': -0.15000000000000036,\n",
" 'TA.again': -0.15000000000000036},\n",
" {'Facilitated.learning': 0.41985801419858015,\n",
" 'Provided.helpful.feedback': 0.6459354064593541,\n",
" 'Is.an.expert': 0.45835416458354167,\n",
" 'Graded.in.a.timely.manner': 0.8809119088091191,\n",
" 'Graded.Fairly': 0.6594340565943405,\n",
" 'Did.NOT.respond.to.email.promptly': 0.24987501249875013,\n",
" 'Knowledgable.of.course.content': 0.42455754424557546,\n",
" 'Helpful.feedback.vias.Canvas.discussion': 0.7619238076192381,\n",
" 'Consistently.fulfilled.responsibilities': 0.32086791320867913,\n",
" 'Considerate.in.communication': 0.5740425957404259,\n",
" 'Treated.me.with.respect': 0.7851214878512148,\n",
" 'Enthusiastic': 0.2521747825217478,\n",
" 'Professional': 0.8780121987801219,\n",
" 'TA.again': 0.7752224777522247})"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sim_npc(jesse, emily, qs, mean_diff, combine=\"tippett\", prng=prng, \\\n",
" reps=int(10**4), verbose=True)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1.203092328742278e-05"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from scipy.stats import chi2\n",
"chi2.sf(-2*np.log(0.08*1.0e-5),df=4)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}