{ "metadata": { "name": "", "signature": "sha256:e5ba3c856cfd5375cef1d2d53af86aea926411dd442ff963016667bd1dff4353" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "import pandas\n", "import scipy.stats\n", "import json" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 16 }, { "cell_type": "code", "collapsed": false, "input": [ "import pywikibot\n", "#Tranforming QIDs into English labels.\n", "enwp = pywikibot.Site('en','wikipedia')\n", "wikidata = enwp.data_repository()\n", "\n", "retrieved = dict()\n", "\n", "def english_label(qid):\n", " if type(qid) is float:\n", " if math.isnan(qid):\n", " return None\n", " #first see if we've done it\n", " try:\n", " return retrieved[qid]\n", " except KeyError:\n", " try:\n", " page = pywikibot.ItemPage(wikidata, qid)\n", " data = page.get()\n", " lab = data['labels']['en']\n", " retrieved[qid] = lab\n", " return lab\n", " except KeyError:\n", " retrieved[qid] = qid\n", " return qid" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stderr", "text": [ "VERBOSE:pywiki:Starting 1 threads...\n" ] } ], "prompt_number": 22 }, { "cell_type": "code", "collapsed": false, "input": [ "# gen_cult = pandas.read_csv('helpers/Chi_Squared_Test_Data.csv') was doing it this way but no longer." ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 25 }, { "cell_type": "code", "collapsed": false, "input": [ "allrecs = pandas.DataFrame.from_dict(json.load(open('helpers/world_cultures_shortcut.json','r')))" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 17 }, { "cell_type": "code", "collapsed": false, "input": [ "obs = pandas.crosstab(allrecs['culture'], allrecs['gender'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 19 }, { "cell_type": "code", "collapsed": false, "input": [ "obs.columns = map(english_label, obs.columns)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stderr", "text": [ "VERBOSE:pywiki:Found 1 wikidata:wikidata processes running, including this one.\n" ] } ], "prompt_number": 23 }, { "cell_type": "code", "collapsed": false, "input": [ "obs" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
transgender femaleintersexfa'afafinetransgender malemale animalwomangenderqueerfemalemalekathoey
culture
africa 0 0 0 0 0 0 0 2256 13915 0
catholic european 7 2 0 1 4 0 0 38267 262253 0
confucian 8 0 0 0 0 0 0 5618 14534 0
english-speaking 38 7 1 9 0 0 4 60753 223374 0
islamic 3 0 0 0 0 0 0 4119 22693 0
latin america 7 0 0 0 0 0 0 11555 64539 0
orthodox 1 0 0 0 1 1 0 14178 81513 0
protestant european 3 1 0 2 0 0 0 49801 244301 0
south asia 5 0 0 0 0 0 0 5531 23594 1
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 24, "text": [ " transgender female intersex fa'afafine \\\n", "culture \n", "africa 0 0 0 \n", "catholic european 7 2 0 \n", "confucian 8 0 0 \n", "english-speaking 38 7 1 \n", "islamic 3 0 0 \n", "latin america 7 0 0 \n", "orthodox 1 0 0 \n", "protestant european 3 1 0 \n", "south asia 5 0 0 \n", "\n", " transgender male male animal woman genderqueer \\\n", "culture \n", "africa 0 0 0 0 \n", "catholic european 1 4 0 0 \n", "confucian 0 0 0 0 \n", "english-speaking 9 0 0 4 \n", "islamic 0 0 0 0 \n", "latin america 0 0 0 0 \n", "orthodox 0 1 1 0 \n", "protestant european 2 0 0 0 \n", "south asia 0 0 0 0 \n", "\n", " female male kathoey \n", "culture \n", "africa 2256 13915 0 \n", "catholic european 38267 262253 0 \n", "confucian 5618 14534 0 \n", "english-speaking 60753 223374 0 \n", "islamic 4119 22693 0 \n", "latin america 11555 64539 0 \n", "orthodox 14178 81513 0 \n", "protestant european 49801 244301 0 \n", "south asia 5531 23594 1 " ] } ], "prompt_number": 24 }, { "cell_type": "code", "collapsed": false, "input": [ "chi2, p, dof, expected = scipy.stats.chi2_contingency(obs)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 29 }, { "cell_type": "code", "collapsed": false, "input": [ "chi2, p, dof" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 30, "text": [ "(10430.455963736977, 0.0, 72)" ] } ], "prompt_number": 30 }, { "cell_type": "code", "collapsed": false, "input": [ "pandas.DataFrame(expected)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123456789
0 1.018735 0.141491 0.014149 0.169789 0.070745 0.014149 0.056596 2717.729756 13451.770440 0.014149
1 18.932932 2.629574 0.262957 3.155489 1.314787 0.262957 1.051830 50508.329383 249997.797134 0.262957
2 1.270032 0.176393 0.017639 0.211672 0.088197 0.017639 0.070557 3388.128865 16770.001365 0.017639
3 17.903047 2.486534 0.248653 2.983841 1.243267 0.248653 0.994614 47760.852663 236398.790074 0.248653
4 1.689282 0.234622 0.023462 0.281547 0.117311 0.023462 0.093849 4506.581127 22305.931875 0.023462
5 4.794183 0.665859 0.066586 0.799031 0.332929 0.066586 0.266344 12789.682280 63304.259617 0.066586
6 6.028496 0.837291 0.083729 1.004749 0.418646 0.083729 0.334916 16082.520021 79602.604693 0.083729
7 18.528109 2.573348 0.257335 3.088018 1.286674 0.257335 1.029339 49428.363307 244652.359199 0.257335
8 1.835184 0.254887 0.025489 0.305864 0.127443 0.025489 0.101955 4895.812598 24232.485603 0.025489
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 28, "text": [ " 0 1 2 3 4 5 6 \\\n", "0 1.018735 0.141491 0.014149 0.169789 0.070745 0.014149 0.056596 \n", "1 18.932932 2.629574 0.262957 3.155489 1.314787 0.262957 1.051830 \n", "2 1.270032 0.176393 0.017639 0.211672 0.088197 0.017639 0.070557 \n", "3 17.903047 2.486534 0.248653 2.983841 1.243267 0.248653 0.994614 \n", "4 1.689282 0.234622 0.023462 0.281547 0.117311 0.023462 0.093849 \n", "5 4.794183 0.665859 0.066586 0.799031 0.332929 0.066586 0.266344 \n", "6 6.028496 0.837291 0.083729 1.004749 0.418646 0.083729 0.334916 \n", "7 18.528109 2.573348 0.257335 3.088018 1.286674 0.257335 1.029339 \n", "8 1.835184 0.254887 0.025489 0.305864 0.127443 0.025489 0.101955 \n", "\n", " 7 8 9 \n", "0 2717.729756 13451.770440 0.014149 \n", "1 50508.329383 249997.797134 0.262957 \n", "2 3388.128865 16770.001365 0.017639 \n", "3 47760.852663 236398.790074 0.248653 \n", "4 4506.581127 22305.931875 0.023462 \n", "5 12789.682280 63304.259617 0.066586 \n", "6 16082.520021 79602.604693 0.083729 \n", "7 49428.363307 244652.359199 0.257335 \n", "8 4895.812598 24232.485603 0.025489 " ] } ], "prompt_number": 28 }, { "cell_type": "code", "collapsed": false, "input": [ "np.ndarr" ], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }