{ "metadata": { "name": "", "signature": "sha256:eb7a9be5b3850ec359d64bdb17442bfd4d219c117e3ea07475acc3e8362ba4c6" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "import pandas as pd\n", "import json" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 89 }, { "cell_type": "code", "collapsed": false, "input": [ "turkfolder = 'helpers/aggregation_maps/mechanical_turk/'" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 39 }, { "cell_type": "code", "collapsed": false, "input": [ "results_files = !ls $turkfolder/results*" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "def make_disagreement_files(filename):\n", " print filename\n", " adf = pd.DataFrame.from_csv(filename)\n", " disagreements = adf[adf['Agreement'] == 'No']\n", " print len(disagreements), ' disgareements'\n", " print len(disagreements) / float(len(adf)), ' as a percentage'\n", " disagreements[['qid','en_label','Answer1','Answer2','Answer']].to_csv(filename+'.disagreements.csv', index=False)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 45 }, { "cell_type": "code", "collapsed": false, "input": [ "for results_file in results_files:\n", " make_disagreement_files(results_file)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "helpers/aggregation_maps/for mechanical turk//results_citizenships.csv\n", "229 disgareements\n", "0.3357771261 as a percentage\n", "helpers/aggregation_maps/for mechanical turk//results_ethnic_groups.csv\n", "284 disgareements\n", "0.387978142077 as a percentage\n" ] } ], "prompt_number": 46 }, { "cell_type": "code", "collapsed": false, "input": [ "country_map = pd.read_csv('helpers/aggregation_maps/country_maps.csv')" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 8 }, { "cell_type": "code", "collapsed": false, "input": [ "dq = disagreements['qid'].apply(lambda x: x.split('http://wikidata.org/wiki/')[1])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 31 }, { "cell_type": "code", "collapsed": false, "input": [ "for d in dq:\n", " if d in list(country_map['qid']):\n", " print d" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Q191\n", "Q33\n", "Q224\n", "Q262\n", "Q1183\n", "Q37\n", "Q902\n" ] } ], "prompt_number": 34 }, { "cell_type": "code", "collapsed": false, "input": [ "def make_cutlure_map(param):\n", " agreements_path = turkfolder+'results_%s.csv'%param\n", " disagreements_path = turkfolder+'results_%s.csv.disagreements.csv'%param\n", " \n", " agree = ethnic_df[ethnic_df['Agreement']=='Yes']['Answer'].to_dict()\n", " disagree = ethnic_disagreements_df['Answer'].to_dict()\n", " cultures_map = dict(agree.items() + disagree.items())\n", " qid_map = {url.split('http://wikidata.org/wiki/')[1] : culture for url, culture in cultures_map.iteritems()} \n", " json.dump(qid_map, open(turkfolder+'%s_map.json'%param,'w'))\n", " \n", "for param in ['ethnic_groups','citizenship']:\n", " make_cutlure_map(param)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 96 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }