{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from document_polluter import DocumentPolluter\n", "import yaml\n", "import os\n", "import requests\n", "import json\n", "from collections import defaultdict\n", "from scipy import stats\n", "\n", "with open('credentials.yaml') as file:\n", " credentials = yaml.load(file, Loader=yaml.FullLoader)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "with open('paragraphs/manual_gendered.yaml') as file:\n", " documents = yaml.load(file, Loader=yaml.FullLoader)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "def get_google_sentiment(document):\n", " url = f\"https://language.googleapis.com/v1/documents:analyzeSentiment?key={credentials['google']['key']}\"\n", " headers = {'content-type': 'application/json'}\n", " data = {\n", " 'document': {\n", " 'type': 'PLAIN_TEXT',\n", " 'content': document\n", " }\n", " }\n", "\n", " r = requests.post(url=url, data=json.dumps(data), headers=headers)\n", " return json.loads(r.text)['documentSentiment']" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "sentiment = defaultdict(list)\n", "for genre, docs in documents.items():\n", " for document in docs:\n", " sentiment[genre].append(get_google_sentiment(document))\n", "\n", "female_scores = [x['score'] for x in sentiment['female']]\n", "male_scores = [x['score'] for x in sentiment['male']]" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Statistics=20.500, p=0.322\n" ] } ], "source": [ "stat, p = stats.mannwhitneyu(female_scores, male_scores)\n", "print('Statistics=%.3f, p=%.3f' % (stat, p))" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "results = []\n", "for idx, document in enumerate(documents):\n", " results.append({\n", " 'female_sentence': documents['female'][idx],\n", " 'male_sentence': documents['male'][idx],\n", " 'female_score': sentiment['female'][idx]['score'],\n", " 'male_score': sentiment['male'][idx]['score'],\n", " 'difference': abs(sentiment['female'][idx]['score'] - sentiment['male'][idx]['score'])\n", " })" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(filter(lambda x: x['difference'] != 0, results))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.0" } }, "nbformat": 4, "nbformat_minor": 4 }