{
"metadata": {
"name": "",
"signature": "sha256:e5ba3c856cfd5375cef1d2d53af86aea926411dd442ff963016667bd1dff4353"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import pandas\n",
"import scipy.stats\n",
"import json"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 16
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import pywikibot\n",
"#Tranforming QIDs into English labels.\n",
"enwp = pywikibot.Site('en','wikipedia')\n",
"wikidata = enwp.data_repository()\n",
"\n",
"retrieved = dict()\n",
"\n",
"def english_label(qid):\n",
" if type(qid) is float:\n",
" if math.isnan(qid):\n",
" return None\n",
" #first see if we've done it\n",
" try:\n",
" return retrieved[qid]\n",
" except KeyError:\n",
" try:\n",
" page = pywikibot.ItemPage(wikidata, qid)\n",
" data = page.get()\n",
" lab = data['labels']['en']\n",
" retrieved[qid] = lab\n",
" return lab\n",
" except KeyError:\n",
" retrieved[qid] = qid\n",
" return qid"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stderr",
"text": [
"VERBOSE:pywiki:Starting 1 threads...\n"
]
}
],
"prompt_number": 22
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# gen_cult = pandas.read_csv('helpers/Chi_Squared_Test_Data.csv') was doing it this way but no longer."
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 25
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"allrecs = pandas.DataFrame.from_dict(json.load(open('helpers/world_cultures_shortcut.json','r')))"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 17
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obs = pandas.crosstab(allrecs['culture'], allrecs['gender'])"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 19
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obs.columns = map(english_label, obs.columns)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stderr",
"text": [
"VERBOSE:pywiki:Found 1 wikidata:wikidata processes running, including this one.\n"
]
}
],
"prompt_number": 23
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obs"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"
\n",
"
\n",
" \n",
" \n",
" | \n",
" transgender female | \n",
" intersex | \n",
" fa'afafine | \n",
" transgender male | \n",
" male animal | \n",
" woman | \n",
" genderqueer | \n",
" female | \n",
" male | \n",
" kathoey | \n",
"
\n",
" \n",
" culture | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" africa | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 2256 | \n",
" 13915 | \n",
" 0 | \n",
"
\n",
" \n",
" catholic european | \n",
" 7 | \n",
" 2 | \n",
" 0 | \n",
" 1 | \n",
" 4 | \n",
" 0 | \n",
" 0 | \n",
" 38267 | \n",
" 262253 | \n",
" 0 | \n",
"
\n",
" \n",
" confucian | \n",
" 8 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 5618 | \n",
" 14534 | \n",
" 0 | \n",
"
\n",
" \n",
" english-speaking | \n",
" 38 | \n",
" 7 | \n",
" 1 | \n",
" 9 | \n",
" 0 | \n",
" 0 | \n",
" 4 | \n",
" 60753 | \n",
" 223374 | \n",
" 0 | \n",
"
\n",
" \n",
" islamic | \n",
" 3 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 4119 | \n",
" 22693 | \n",
" 0 | \n",
"
\n",
" \n",
" latin america | \n",
" 7 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 11555 | \n",
" 64539 | \n",
" 0 | \n",
"
\n",
" \n",
" orthodox | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
" 14178 | \n",
" 81513 | \n",
" 0 | \n",
"
\n",
" \n",
" protestant european | \n",
" 3 | \n",
" 1 | \n",
" 0 | \n",
" 2 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 49801 | \n",
" 244301 | \n",
" 0 | \n",
"
\n",
" \n",
" south asia | \n",
" 5 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 5531 | \n",
" 23594 | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 24,
"text": [
" transgender female intersex fa'afafine \\\n",
"culture \n",
"africa 0 0 0 \n",
"catholic european 7 2 0 \n",
"confucian 8 0 0 \n",
"english-speaking 38 7 1 \n",
"islamic 3 0 0 \n",
"latin america 7 0 0 \n",
"orthodox 1 0 0 \n",
"protestant european 3 1 0 \n",
"south asia 5 0 0 \n",
"\n",
" transgender male male animal woman genderqueer \\\n",
"culture \n",
"africa 0 0 0 0 \n",
"catholic european 1 4 0 0 \n",
"confucian 0 0 0 0 \n",
"english-speaking 9 0 0 4 \n",
"islamic 0 0 0 0 \n",
"latin america 0 0 0 0 \n",
"orthodox 0 1 1 0 \n",
"protestant european 2 0 0 0 \n",
"south asia 0 0 0 0 \n",
"\n",
" female male kathoey \n",
"culture \n",
"africa 2256 13915 0 \n",
"catholic european 38267 262253 0 \n",
"confucian 5618 14534 0 \n",
"english-speaking 60753 223374 0 \n",
"islamic 4119 22693 0 \n",
"latin america 11555 64539 0 \n",
"orthodox 14178 81513 0 \n",
"protestant european 49801 244301 0 \n",
"south asia 5531 23594 1 "
]
}
],
"prompt_number": 24
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"chi2, p, dof, expected = scipy.stats.chi2_contingency(obs)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 29
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"chi2, p, dof"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 30,
"text": [
"(10430.455963736977, 0.0, 72)"
]
}
],
"prompt_number": 30
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"pandas.DataFrame(expected)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
" 5 | \n",
" 6 | \n",
" 7 | \n",
" 8 | \n",
" 9 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1.018735 | \n",
" 0.141491 | \n",
" 0.014149 | \n",
" 0.169789 | \n",
" 0.070745 | \n",
" 0.014149 | \n",
" 0.056596 | \n",
" 2717.729756 | \n",
" 13451.770440 | \n",
" 0.014149 | \n",
"
\n",
" \n",
" 1 | \n",
" 18.932932 | \n",
" 2.629574 | \n",
" 0.262957 | \n",
" 3.155489 | \n",
" 1.314787 | \n",
" 0.262957 | \n",
" 1.051830 | \n",
" 50508.329383 | \n",
" 249997.797134 | \n",
" 0.262957 | \n",
"
\n",
" \n",
" 2 | \n",
" 1.270032 | \n",
" 0.176393 | \n",
" 0.017639 | \n",
" 0.211672 | \n",
" 0.088197 | \n",
" 0.017639 | \n",
" 0.070557 | \n",
" 3388.128865 | \n",
" 16770.001365 | \n",
" 0.017639 | \n",
"
\n",
" \n",
" 3 | \n",
" 17.903047 | \n",
" 2.486534 | \n",
" 0.248653 | \n",
" 2.983841 | \n",
" 1.243267 | \n",
" 0.248653 | \n",
" 0.994614 | \n",
" 47760.852663 | \n",
" 236398.790074 | \n",
" 0.248653 | \n",
"
\n",
" \n",
" 4 | \n",
" 1.689282 | \n",
" 0.234622 | \n",
" 0.023462 | \n",
" 0.281547 | \n",
" 0.117311 | \n",
" 0.023462 | \n",
" 0.093849 | \n",
" 4506.581127 | \n",
" 22305.931875 | \n",
" 0.023462 | \n",
"
\n",
" \n",
" 5 | \n",
" 4.794183 | \n",
" 0.665859 | \n",
" 0.066586 | \n",
" 0.799031 | \n",
" 0.332929 | \n",
" 0.066586 | \n",
" 0.266344 | \n",
" 12789.682280 | \n",
" 63304.259617 | \n",
" 0.066586 | \n",
"
\n",
" \n",
" 6 | \n",
" 6.028496 | \n",
" 0.837291 | \n",
" 0.083729 | \n",
" 1.004749 | \n",
" 0.418646 | \n",
" 0.083729 | \n",
" 0.334916 | \n",
" 16082.520021 | \n",
" 79602.604693 | \n",
" 0.083729 | \n",
"
\n",
" \n",
" 7 | \n",
" 18.528109 | \n",
" 2.573348 | \n",
" 0.257335 | \n",
" 3.088018 | \n",
" 1.286674 | \n",
" 0.257335 | \n",
" 1.029339 | \n",
" 49428.363307 | \n",
" 244652.359199 | \n",
" 0.257335 | \n",
"
\n",
" \n",
" 8 | \n",
" 1.835184 | \n",
" 0.254887 | \n",
" 0.025489 | \n",
" 0.305864 | \n",
" 0.127443 | \n",
" 0.025489 | \n",
" 0.101955 | \n",
" 4895.812598 | \n",
" 24232.485603 | \n",
" 0.025489 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 28,
"text": [
" 0 1 2 3 4 5 6 \\\n",
"0 1.018735 0.141491 0.014149 0.169789 0.070745 0.014149 0.056596 \n",
"1 18.932932 2.629574 0.262957 3.155489 1.314787 0.262957 1.051830 \n",
"2 1.270032 0.176393 0.017639 0.211672 0.088197 0.017639 0.070557 \n",
"3 17.903047 2.486534 0.248653 2.983841 1.243267 0.248653 0.994614 \n",
"4 1.689282 0.234622 0.023462 0.281547 0.117311 0.023462 0.093849 \n",
"5 4.794183 0.665859 0.066586 0.799031 0.332929 0.066586 0.266344 \n",
"6 6.028496 0.837291 0.083729 1.004749 0.418646 0.083729 0.334916 \n",
"7 18.528109 2.573348 0.257335 3.088018 1.286674 0.257335 1.029339 \n",
"8 1.835184 0.254887 0.025489 0.305864 0.127443 0.025489 0.101955 \n",
"\n",
" 7 8 9 \n",
"0 2717.729756 13451.770440 0.014149 \n",
"1 50508.329383 249997.797134 0.262957 \n",
"2 3388.128865 16770.001365 0.017639 \n",
"3 47760.852663 236398.790074 0.248653 \n",
"4 4506.581127 22305.931875 0.023462 \n",
"5 12789.682280 63304.259617 0.066586 \n",
"6 16082.520021 79602.604693 0.083729 \n",
"7 49428.363307 244652.359199 0.257335 \n",
"8 4895.812598 24232.485603 0.025489 "
]
}
],
"prompt_number": 28
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"np.ndarr"
],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}