In [1]:
from document_polluter import DocumentPolluter
import yaml
import os
import requests
import json
from collections import defaultdict
from scipy import stats

with open('credentials.yaml') as file:
    credentials = yaml.load(file, Loader=yaml.FullLoader)

In [2]:
with open('paragraphs/manual_gendered.yaml') as file:
    documents = yaml.load(file, Loader=yaml.FullLoader)

In [3]:
def get_google_sentiment(document):
    url = f"https://language.googleapis.com/v1/documents:analyzeSentiment?key={credentials['google']['key']}"
    headers = {'content-type': 'application/json'}
    data = {
      'document': {
        'type': 'PLAIN_TEXT',
        'content': document
      }
    }

    r = requests.post(url=url, data=json.dumps(data), headers=headers)
    return json.loads(r.text)['documentSentiment']

In [4]:
sentiment = defaultdict(list)
for genre, docs in documents.items():
    for document in docs:
        sentiment[genre].append(get_google_sentiment(document))

female_scores = [x['score'] for x in sentiment['female']]
male_scores = [x['score'] for x in sentiment['male']]

In [5]:
stat, p = stats.mannwhitneyu(female_scores, male_scores)
print('Statistics=%.3f, p=%.3f' % (stat, p))

Statistics=20.500, p=0.322


In [6]:
results = []
for idx, document in enumerate(documents):
    results.append({
        'female_sentence': documents['female'][idx],
        'male_sentence': documents['male'][idx],
        'female_score': sentiment['female'][idx]['score'],
        'male_score': sentiment['male'][idx]['score'],
        'difference': abs(sentiment['female'][idx]['score'] - sentiment['male'][idx]['score'])
    })

In [7]:
list(filter(lambda x: x['difference'] != 0, results))

[]