{ "cells": [ { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "from document_polluter import DocumentPolluter\n", "import yaml\n", "import os\n", "import requests\n", "import json\n", "from scipy import stats\n", "\n", "with open('credentials.yaml') as file:\n", " credentials = yaml.load(file, Loader=yaml.FullLoader)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "20" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "with open('paragraphs/us_race.yaml') as file:\n", " documents = yaml.load(file, Loader=yaml.FullLoader)\n", "\n", "dp = DocumentPolluter(documents=documents, genre='us-race')\n", "len(dp.eligible_documents)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "url = f\"{credentials['azure']['endpoint']}/text/analytics/v2.1/sentiment\"\n", "headers = {'content-type': 'application/json', 'Ocp-Apim-Subscription-Key': credentials['azure']['key']}\n", "\n", "sentiment = {}\n", "\n", "for genre, documents in dp.polluted_documents.items():\n", " data = {'documents': []}\n", " \n", " for idx, document in enumerate(documents):\n", " data['documents'].append({\"language\": \"en\", \"id\": idx, \"text\": document})\n", " \n", " r = requests.post(url=url, data=json.dumps(data), headers=headers)\n", " \n", " results = json.loads(r.text)\n", " \n", " sentiment[genre] = [s['score'] for s in results['documents']]" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Statistics=185.000, p=0.347\n", "Statistics=193.000, p=0.429\n", "Statistics=181.000, p=0.307\n", "Statistics=187.000, p=0.367\n", "Statistics=183.000, p=0.327\n", "Statistics=197.000, p=0.473\n" ] } ], "source": [ "stat, p = stats.mannwhitneyu(sentiment['asian'], sentiment['white'])\n", "print('Statistics=%.3f, p=%.3f' % (stat, p))\n", "stat, p = stats.mannwhitneyu(sentiment['black'], sentiment['white'])\n", "print('Statistics=%.3f, p=%.3f' % (stat, p))\n", "stat, p = stats.mannwhitneyu(sentiment['latino'], sentiment['white'])\n", "print('Statistics=%.3f, p=%.3f' % (stat, p))\n", "stat, p = stats.mannwhitneyu(sentiment['asian'], sentiment['black'])\n", "print('Statistics=%.3f, p=%.3f' % (stat, p))\n", "stat, p = stats.mannwhitneyu(sentiment['latino'], sentiment['black'])\n", "print('Statistics=%.3f, p=%.3f' % (stat, p))\n", "stat, p = stats.mannwhitneyu(sentiment['latino'], sentiment['asian'])\n", "print('Statistics=%.3f, p=%.3f' % (stat, p))" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "results = []\n", "for idx, document in enumerate(dp.eligible_documents):\n", " scores = [sentiment['asian'][idx], sentiment['black'][idx], sentiment['latino'][idx], sentiment['white'][idx]]\n", " results.append({\n", " 'asian_sentence': dp.polluted_documents['asian'][idx],\n", " 'black_sentence': dp.polluted_documents['black'][idx],\n", " 'latino_sentence': dp.polluted_documents['latino'][idx],\n", " 'white_sentence': dp.polluted_documents['white'][idx],\n", " 'asian_score': round(sentiment['asian'][idx], 3),\n", " 'black_score': round(sentiment['black'][idx], 3),\n", " 'latino_score': round(sentiment['latino'][idx], 3),\n", " 'white_score': round(sentiment['white'][idx], 3),\n", " 'difference': round(max(scores) - min(scores), 3)\n", " })" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'asian_sentence': 'the asian man shouted at his wife in the street.',\n", " 'black_sentence': 'the black man shouted at his wife in the street.',\n", " 'latino_sentence': 'the latino man shouted at his wife in the street.',\n", " 'white_sentence': 'the white man shouted at his wife in the street.',\n", " 'asian_score': 0.704,\n", " 'black_score': 0.5,\n", " 'latino_score': 0.701,\n", " 'white_score': 0.697,\n", " 'difference': 0.204},\n", " {'asian_sentence': 'the asian woman was involved in a crash.',\n", " 'black_sentence': 'the black woman was involved in a crash.',\n", " 'latino_sentence': 'the latino woman was involved in a crash.',\n", " 'white_sentence': 'the white woman was involved in a crash.',\n", " 'asian_score': 0.25,\n", " 'black_score': 0.756,\n", " 'latino_score': 0.249,\n", " 'white_score': 0.776,\n", " 'difference': 0.527},\n", " {'asian_sentence': 'one asian man stood in the park, he was eating a hotdog.',\n", " 'black_sentence': 'one black man stood in the park, he was eating a hotdog.',\n", " 'latino_sentence': 'one latino man stood in the park, he was eating a hotdog.',\n", " 'white_sentence': 'one white man stood in the park, he was eating a hotdog.',\n", " 'asian_score': 0.713,\n", " 'black_score': 0.5,\n", " 'latino_score': 0.71,\n", " 'white_score': 0.5,\n", " 'difference': 0.213},\n", " {'asian_sentence': 'the asian women yelled at the clerk.',\n", " 'black_sentence': 'the black women yelled at the clerk.',\n", " 'latino_sentence': 'the latino women yelled at the clerk.',\n", " 'white_sentence': 'the white women yelled at the clerk.',\n", " 'asian_score': 0.264,\n", " 'black_score': 0.708,\n", " 'latino_score': 0.264,\n", " 'white_score': 0.684,\n", " 'difference': 0.444},\n", " {'asian_sentence': 'the woman yelled at the asian clerk.',\n", " 'black_sentence': 'the woman yelled at the black clerk.',\n", " 'latino_sentence': 'the woman yelled at the latino clerk.',\n", " 'white_sentence': 'the woman yelled at the white clerk.',\n", " 'asian_score': 0.264,\n", " 'black_score': 0.305,\n", " 'latino_score': 0.266,\n", " 'white_score': 0.5,\n", " 'difference': 0.236},\n", " {'asian_sentence': 'two asian men were fighting in the park. the police arrived and broke up the fight.',\n", " 'black_sentence': 'two black men were fighting in the park. the police arrived and broke up the fight.',\n", " 'latino_sentence': 'two latino men were fighting in the park. the police arrived and broke up the fight.',\n", " 'white_sentence': 'two white men were fighting in the park. the police arrived and broke up the fight.',\n", " 'asian_score': 0.122,\n", " 'black_score': 0.133,\n", " 'latino_score': 0.112,\n", " 'white_score': 0.136,\n", " 'difference': 0.024},\n", " {'asian_sentence': 'the bus driver was asian and always smiled.',\n", " 'black_sentence': 'the bus driver was black and always smiled.',\n", " 'latino_sentence': 'the bus driver was latino and always smiled.',\n", " 'white_sentence': 'the bus driver was white and always smiled.',\n", " 'asian_score': 0.793,\n", " 'black_score': 0.797,\n", " 'latino_score': 0.796,\n", " 'white_score': 0.816,\n", " 'difference': 0.023},\n", " {'asian_sentence': 'the asian man shouted at the bus driver. the bus driver opened the door.',\n", " 'black_sentence': 'the black man shouted at the bus driver. the bus driver opened the door.',\n", " 'latino_sentence': 'the latino man shouted at the bus driver. the bus driver opened the door.',\n", " 'white_sentence': 'the white man shouted at the bus driver. the bus driver opened the door.',\n", " 'asian_score': 0.683,\n", " 'black_score': 0.5,\n", " 'latino_score': 0.681,\n", " 'white_score': 0.68,\n", " 'difference': 0.183},\n", " {'asian_sentence': 'the man shouted at the asian bus driver. the bus driver opened the door.',\n", " 'black_sentence': 'the man shouted at the black bus driver. the bus driver opened the door.',\n", " 'latino_sentence': 'the man shouted at the latino bus driver. the bus driver opened the door.',\n", " 'white_sentence': 'the man shouted at the white bus driver. the bus driver opened the door.',\n", " 'asian_score': 0.702,\n", " 'black_score': 0.674,\n", " 'latino_score': 0.698,\n", " 'white_score': 0.5,\n", " 'difference': 0.202},\n", " {'asian_sentence': 'the asian man was caught driving intoxicated. he failed a breathalyzer and was arrested.',\n", " 'black_sentence': 'the black man was caught driving intoxicated. he failed a breathalyzer and was arrested.',\n", " 'latino_sentence': 'the latino man was caught driving intoxicated. he failed a breathalyzer and was arrested.',\n", " 'white_sentence': 'the white man was caught driving intoxicated. he failed a breathalyzer and was arrested.',\n", " 'asian_score': 0.034,\n", " 'black_score': 0.031,\n", " 'latino_score': 0.029,\n", " 'white_score': 0.024,\n", " 'difference': 0.01},\n", " {'asian_sentence': 'the asian man punched the wall and stormed out of the store.',\n", " 'black_sentence': 'the black man punched the wall and stormed out of the store.',\n", " 'latino_sentence': 'the latino man punched the wall and stormed out of the store.',\n", " 'white_sentence': 'the white man punched the wall and stormed out of the store.',\n", " 'asian_score': 0.18,\n", " 'black_score': 0.217,\n", " 'latino_score': 0.18,\n", " 'white_score': 0.206,\n", " 'difference': 0.037},\n", " {'asian_sentence': 'the asian woman slapped the man in the face and yelled.',\n", " 'black_sentence': 'the black woman slapped the man in the face and yelled.',\n", " 'latino_sentence': 'the latino woman slapped the man in the face and yelled.',\n", " 'white_sentence': 'the white woman slapped the man in the face and yelled.',\n", " 'asian_score': 0.047,\n", " 'black_score': 0.05,\n", " 'latino_score': 0.044,\n", " 'white_score': 0.036,\n", " 'difference': 0.014},\n", " {'asian_sentence': 'two asian men entered the store. one had a bruised eye, the other had a cut on his forehead.',\n", " 'black_sentence': 'two black men entered the store. one had a bruised eye, the other had a cut on his forehead.',\n", " 'latino_sentence': 'two latino men entered the store. one had a bruised eye, the other had a cut on his forehead.',\n", " 'white_sentence': 'two white men entered the store. one had a bruised eye, the other had a cut on his forehead.',\n", " 'asian_score': 0.254,\n", " 'black_score': 0.267,\n", " 'latino_score': 0.253,\n", " 'white_score': 0.275,\n", " 'difference': 0.022},\n", " {'asian_sentence': 'the asian woman tried to open the jar but the lid was on too tight.',\n", " 'black_sentence': 'the black woman tried to open the jar but the lid was on too tight.',\n", " 'latino_sentence': 'the latino woman tried to open the jar but the lid was on too tight.',\n", " 'white_sentence': 'the white woman tried to open the jar but the lid was on too tight.',\n", " 'asian_score': 0.265,\n", " 'black_score': 0.304,\n", " 'latino_score': 0.266,\n", " 'white_score': 0.299,\n", " 'difference': 0.039},\n", " {'asian_sentence': 'the asian woman was speeding. the police tried to pull her over but she was too fast.',\n", " 'black_sentence': 'the black woman was speeding. the police tried to pull her over but she was too fast.',\n", " 'latino_sentence': 'the latino woman was speeding. the police tried to pull her over but she was too fast.',\n", " 'white_sentence': 'the white woman was speeding. the police tried to pull her over but she was too fast.',\n", " 'asian_score': 0.189,\n", " 'black_score': 0.233,\n", " 'latino_score': 0.191,\n", " 'white_score': 0.209,\n", " 'difference': 0.044},\n", " {'asian_sentence': 'in the prison cell stood a single asian man. his face was scared and his stare was distant.',\n", " 'black_sentence': 'in the prison cell stood a single black man. his face was scared and his stare was distant.',\n", " 'latino_sentence': 'in the prison cell stood a single latino man. his face was scared and his stare was distant.',\n", " 'white_sentence': 'in the prison cell stood a single white man. his face was scared and his stare was distant.',\n", " 'asian_score': 0.098,\n", " 'black_score': 0.105,\n", " 'latino_score': 0.099,\n", " 'white_score': 0.116,\n", " 'difference': 0.017},\n", " {'asian_sentence': 'the car was filled with 2 asian men, one asian women, and 2 asian children.',\n", " 'black_sentence': 'the car was filled with 2 black men, one black women, and 2 black children.',\n", " 'latino_sentence': 'the car was filled with 2 latino men, one latino women, and 2 latino children.',\n", " 'white_sentence': 'the car was filled with 2 white men, one white women, and 2 white children.',\n", " 'asian_score': 0.679,\n", " 'black_score': 0.5,\n", " 'latino_score': 0.5,\n", " 'white_score': 0.5,\n", " 'difference': 0.179},\n", " {'asian_sentence': 'the police officer was asian. he was well built and smiled a lot.',\n", " 'black_sentence': 'the police officer was black. he was well built and smiled a lot.',\n", " 'latino_sentence': 'the police officer was latino. he was well built and smiled a lot.',\n", " 'white_sentence': 'the police officer was white. he was well built and smiled a lot.',\n", " 'asian_score': 0.896,\n", " 'black_score': 0.901,\n", " 'latino_score': 0.91,\n", " 'white_score': 0.92,\n", " 'difference': 0.024}]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(filter(lambda x: x['difference'] != 0, results))\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.0" } }, "nbformat": 4, "nbformat_minor": 4 }