{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## importing data"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import codecs\n",
"import json\n",
"import os, sys\n",
"\n",
"from collections import defaultdict\n",
"from multiprocessing import Pool as ThreadPool\n",
"\n",
"from IPython.display import display, HTML\n",
"\n",
"import pandas as pd\n",
"\n",
"sys.path.append(os.path.abspath('../../WKP-python-toolkit'))\n",
"import wekeypedia"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"inflections = defaultdict(dict)\n",
"\n",
"ignore_list = \"{}()[]<>./,;\\\"':!?=*&%\"\n",
"\n",
"def from_file(name):\n",
" diff_txt = \"\"\n",
"\n",
" with codecs.open(name, \"r\", encoding=\"utf-8-sig\") as f:\n",
" data = json.load(f)\n",
"\n",
" return data\n",
"\n",
"def list_revisions(page):\n",
" return os.listdir(\"data/%s\" % (page))\n",
"\n",
"def revision_stems(revision_filename):\n",
" p = wekeypedia.WikipediaPage()\n",
" # print revision_filename\n",
" rev = from_file(revision_filename)\n",
" \n",
" # extract diff text\n",
" diff = rev[\"diff\"][\"*\"]\n",
" \n",
" # bug with Ethics#462124891\n",
" if diff == False:\n",
" return { \"added\": {}, \"deleted\": {} }\n",
" \n",
" diff = p.extract_plusminus(diff)\n",
"\n",
" # count stems by added/deleted\n",
" stems = {}\n",
" stems[\"added\"] = p.count_stems(diff[\"added\"], inflections)\n",
" stems[\"deleted\"] = p.count_stems(diff[\"deleted\"], inflections)\n",
" # p.print_plusminus_terms_overview(stems)\n",
" \n",
" return stems\n",
"\n",
"def source_stems(s):\n",
" p = wekeypedia.WikipediaPage(s)\n",
" \n",
" revisions = list_revisions(s)\n",
"\n",
" result = {\n",
" \"added\": defaultdict(dict),\n",
" \"deleted\": defaultdict(dict) }\n",
" \n",
" print \"%s: %s revisions\" % (s, len(revisions))\n",
" \n",
" i = 0\n",
"\n",
" for r in revisions:\n",
" i += 1\n",
" print \"\\rrevisions: %s (%s/%s)\" % (r, i, len(revisions),),\n",
" stems = revision_stems(\"data/%s/%s\" % (s, r))\n",
" \n",
" for x in [\"added\", \"deleted\"]:\n",
" for stem in stems[x].iteritems():\n",
" result[x].setdefault(stem[0], 0)\n",
" result[x][stem[0]] += stem[1]\n",
" print \"\\r \",\n",
" return result\n",
"\n",
"def to_df(a):\n",
" df_add = pd.DataFrame([ [ x[1] ] for x in a[\"added\"].iteritems() ], index=a[\"added\"].keys())\n",
" df_add.columns = [ 'added' ]\n",
"\n",
" df_del = pd.DataFrame([ [ x[1] ] for x in a[\"deleted\"].iteritems() ], index=a[\"deleted\"].keys())\n",
" df_del.columns = [ 'deleted' ]\n",
"\n",
" df = df_add.join(df_del, how=\"outer\")\n",
" \n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def clean_and_compute(df):\n",
" ignore_list = [ \"a\", \"of\", \"and\", \"to\", \"the\", \"is\", \"for\", \"or\" , \"in\", \"that\", \"it\", \"|\", \"ref\",\n",
" \"http\", \"''\", \"``\", \"s\", \"an\", \"-\", \"=\", \"*\", \"==\", \"===\", \"====\", \"name=\", \"nbsp\", \"style=\", \"5px\",\n",
" \"font-siz\", \"|-\", \"--\", \"wikiquot\", \"/ref\", \"'s\" ]\n",
"\n",
" df = df.drop([ w for w in ignore_list if w in df.index ])\n",
" \n",
" df[\"added - deleted\"] = map(lambda x, y: x-y, df[\"added\"], df[\"deleted\"])\n",
" df[\"abs(added - deleted)\"] = map(lambda x, y: abs(x-y), df[\"added\"], df[\"deleted\"])\n",
" df = df.sort([\"abs(added - deleted)\", \"added\"], ascending=[0, 0])\n",
"\n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Love: 6324 revisions\n",
" \n"
]
}
],
"source": [
"love = source_stems(\"Love\")\n",
"love = to_df(love)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"
\n",
" \n",
" \n",
" | \n",
" added | \n",
" deleted | \n",
" added - deleted | \n",
" abs(added - deleted) | \n",
"
\n",
" \n",
" \n",
" \n",
" | love | \n",
" 41315 | \n",
" 40990 | \n",
" 325 | \n",
" 325 | \n",
"
\n",
" \n",
" | be | \n",
" 5886 | \n",
" 5831 | \n",
" 55 | \n",
" 55 | \n",
"
\n",
" \n",
" | god | \n",
" 4074 | \n",
" 4024 | \n",
" 50 | \n",
" 50 | \n",
"
\n",
" \n",
" | with | \n",
" 4237 | \n",
" 4193 | \n",
" 44 | \n",
" 44 | \n",
"
\n",
" \n",
" | by | \n",
" 4147 | \n",
" 4104 | \n",
" 43 | \n",
" 43 | \n",
"
\n",
" \n",
" | one | \n",
" 3169 | \n",
" 3135 | \n",
" 34 | \n",
" 34 | \n",
"
\n",
" \n",
" | cite | \n",
" 556 | \n",
" 522 | \n",
" 34 | \n",
" 34 | \n",
"
\n",
" \n",
" | from | \n",
" 2039 | \n",
" 2007 | \n",
" 32 | \n",
" 32 | \n",
"
\n",
" \n",
" | thi | \n",
" 3509 | \n",
" 3478 | \n",
" 31 | \n",
" 31 | \n",
"
\n",
" \n",
" | which | \n",
" 3357 | \n",
" 3326 | \n",
" 31 | \n",
" 31 | \n",
"
\n",
" \n",
" | not | \n",
" 3376 | \n",
" 3346 | \n",
" 30 | \n",
" 30 | \n",
"
\n",
" \n",
" | on | \n",
" 3083 | \n",
" 3055 | \n",
" 28 | \n",
" 28 | \n",
"
\n",
" \n",
" | are | \n",
" 3558 | \n",
" 3531 | \n",
" 27 | \n",
" 27 | \n",
"
\n",
" \n",
" | other | \n",
" 2959 | \n",
" 2934 | \n",
" 25 | \n",
" 25 | \n",
"
\n",
" \n",
" | use | \n",
" 2679 | \n",
" 2654 | \n",
" 25 | \n",
" 25 | \n",
"
\n",
" \n",
" | also | \n",
" 1620 | \n",
" 1595 | \n",
" 25 | \n",
" 25 | \n",
"
\n",
" \n",
" | human | \n",
" 2025 | \n",
" 2001 | \n",
" 24 | \n",
" 24 | \n",
"
\n",
" \n",
" | word | \n",
" 2679 | \n",
" 2656 | \n",
" 23 | \n",
" 23 | \n",
"
\n",
" \n",
" | romant | \n",
" 2364 | \n",
" 2341 | \n",
" 23 | \n",
" 23 | \n",
"
\n",
" \n",
" | term | \n",
" 1731 | \n",
" 1708 | \n",
" 23 | \n",
" 23 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" added deleted added - deleted abs(added - deleted)\n",
"love 41315 40990 325 325\n",
"be 5886 5831 55 55\n",
"god 4074 4024 50 50\n",
"with 4237 4193 44 44\n",
"by 4147 4104 43 43\n",
"one 3169 3135 34 34\n",
"cite 556 522 34 34\n",
"from 2039 2007 32 32\n",
"thi 3509 3478 31 31\n",
"which 3357 3326 31 31\n",
"not 3376 3346 30 30\n",
"on 3083 3055 28 28\n",
"are 3558 3531 27 27\n",
"other 2959 2934 25 25\n",
"use 2679 2654 25 25\n",
"also 1620 1595 25 25\n",
"human 2025 2001 24 24\n",
"word 2679 2656 23 23\n",
"romant 2364 2341 23 23\n",
"term 1731 1708 23 23"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"love = clean_and_compute(love)\n",
"love.head(20)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Wisdom: 1634 revisions\n",
" \n"
]
}
],
"source": [
"wisdom = source_stems(\"Wisdom\")\n",
"wisdom = to_df(wisdom)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" added | \n",
" deleted | \n",
" added - deleted | \n",
" abs(added - deleted) | \n",
"
\n",
" \n",
" \n",
" \n",
" | wisdom | \n",
" 3140 | \n",
" 3025 | \n",
" 115 | \n",
" 115 | \n",
"
\n",
" \n",
" | with | \n",
" 1093 | \n",
" 1064 | \n",
" 29 | \n",
" 29 | \n",
"
\n",
" \n",
" | be | \n",
" 699 | \n",
" 675 | \n",
" 24 | \n",
" 24 | \n",
"
\n",
" \n",
" | he | \n",
" 506 | \n",
" 484 | \n",
" 22 | \n",
" 22 | \n",
"
\n",
" \n",
" | cite | \n",
" 138 | \n",
" 116 | \n",
" 22 | \n",
" 22 | \n",
"
\n",
" \n",
" | one | \n",
" 645 | \n",
" 624 | \n",
" 21 | \n",
" 21 | \n",
"
\n",
" \n",
" | wise | \n",
" 619 | \n",
" 599 | \n",
" 20 | \n",
" 20 | \n",
"
\n",
" \n",
" | from | \n",
" 508 | \n",
" 488 | \n",
" 20 | \n",
" 20 | \n",
"
\n",
" \n",
" | by | \n",
" 494 | \n",
" 475 | \n",
" 19 | \n",
" 19 | \n",
"
\n",
" \n",
" | which | \n",
" 252 | \n",
" 234 | \n",
" 18 | \n",
" 18 | \n",
"
\n",
" \n",
" | not | \n",
" 416 | \n",
" 399 | \n",
" 17 | \n",
" 17 | \n",
"
\n",
" \n",
" | are | \n",
" 421 | \n",
" 405 | \n",
" 16 | \n",
" 16 | \n",
"
\n",
" \n",
" | person | \n",
" 308 | \n",
" 292 | \n",
" 16 | \n",
" 16 | \n",
"
\n",
" \n",
" | virtu | \n",
" 394 | \n",
" 379 | \n",
" 15 | \n",
" 15 | \n",
"
\n",
" \n",
" | knowledg | \n",
" 808 | \n",
" 794 | \n",
" 14 | \n",
" 14 | \n",
"
\n",
" \n",
" | who | \n",
" 304 | \n",
" 290 | \n",
" 14 | \n",
" 14 | \n",
"
\n",
" \n",
" | god | \n",
" 270 | \n",
" 256 | \n",
" 14 | \n",
" 14 | \n",
"
\n",
" \n",
" | proverb | \n",
" 97 | \n",
" 83 | \n",
" 14 | \n",
" 14 | \n",
"
\n",
" \n",
" | thi | \n",
" 502 | \n",
" 489 | \n",
" 13 | \n",
" 13 | \n",
"
\n",
" \n",
" | other | \n",
" 375 | \n",
" 362 | \n",
" 13 | \n",
" 13 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" added deleted added - deleted abs(added - deleted)\n",
"wisdom 3140 3025 115 115\n",
"with 1093 1064 29 29\n",
"be 699 675 24 24\n",
"he 506 484 22 22\n",
"cite 138 116 22 22\n",
"one 645 624 21 21\n",
"wise 619 599 20 20\n",
"from 508 488 20 20\n",
"by 494 475 19 19\n",
"which 252 234 18 18\n",
"not 416 399 17 17\n",
"are 421 405 16 16\n",
"person 308 292 16 16\n",
"virtu 394 379 15 15\n",
"knowledg 808 794 14 14\n",
"who 304 290 14 14\n",
"god 270 256 14 14\n",
"proverb 97 83 14 14\n",
"thi 502 489 13 13\n",
"other 375 362 13 13"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wisdom = clean_and_compute(wisdom)\n",
"wisdom.head(20)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Morality: 2776 revisions\n",
" \n"
]
}
],
"source": [
"morality = source_stems(\"Morality\")\n",
"morality = to_df(morality)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" added | \n",
" deleted | \n",
" added - deleted | \n",
" abs(added - deleted) | \n",
"
\n",
" \n",
" \n",
" \n",
" | moral | \n",
" 7595 | \n",
" 7421 | \n",
" 174 | \n",
" 174 | \n",
"
\n",
" \n",
" | cite | \n",
" 728 | \n",
" 665 | \n",
" 63 | \n",
" 63 | \n",
"
\n",
" \n",
" | journal | \n",
" 620 | \n",
" 565 | \n",
" 55 | \n",
" 55 | \n",
"
\n",
" \n",
" | on | \n",
" 1577 | \n",
" 1530 | \n",
" 47 | \n",
" 47 | \n",
"
\n",
" \n",
" | be | \n",
" 1934 | \n",
" 1898 | \n",
" 36 | \n",
" 36 | \n",
"
\n",
" \n",
" | with | \n",
" 1347 | \n",
" 1313 | \n",
" 34 | \n",
" 34 | \n",
"
\n",
" \n",
" | religion | \n",
" 956 | \n",
" 923 | \n",
" 33 | \n",
" 33 | \n",
"
\n",
" \n",
" | are | \n",
" 1661 | \n",
" 1629 | \n",
" 32 | \n",
" 32 | \n",
"
\n",
" \n",
" | by | \n",
" 1097 | \n",
" 1066 | \n",
" 31 | \n",
" 31 | \n",
"
\n",
" \n",
" | ethic | \n",
" 1356 | \n",
" 1326 | \n",
" 30 | \n",
" 30 | \n",
"
\n",
" \n",
" | from | \n",
" 1106 | \n",
" 1078 | \n",
" 28 | \n",
" 28 | \n",
"
\n",
" \n",
" | thi | \n",
" 1005 | \n",
" 978 | \n",
" 27 | \n",
" 27 | \n",
"
\n",
" \n",
" | cultur | \n",
" 923 | \n",
" 897 | \n",
" 26 | \n",
" 26 | \n",
"
\n",
" \n",
" | other | \n",
" 780 | \n",
" 756 | \n",
" 24 | \n",
" 24 | \n",
"
\n",
" \n",
" | year | \n",
" 240 | \n",
" 216 | \n",
" 24 | \n",
" 24 | \n",
"
\n",
" \n",
" | have | \n",
" 824 | \n",
" 801 | \n",
" 23 | \n",
" 23 | \n",
"
\n",
" \n",
" | behavior | \n",
" 776 | \n",
" 753 | \n",
" 23 | \n",
" 23 | \n",
"
\n",
" \n",
" | studi | \n",
" 602 | \n",
" 579 | \n",
" 23 | \n",
" 23 | \n",
"
\n",
" \n",
" | page | \n",
" 197 | \n",
" 174 | \n",
" 23 | \n",
" 23 | \n",
"
\n",
" \n",
" | theori | \n",
" 571 | \n",
" 549 | \n",
" 22 | \n",
" 22 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" added deleted added - deleted abs(added - deleted)\n",
"moral 7595 7421 174 174\n",
"cite 728 665 63 63\n",
"journal 620 565 55 55\n",
"on 1577 1530 47 47\n",
"be 1934 1898 36 36\n",
"with 1347 1313 34 34\n",
"religion 956 923 33 33\n",
"are 1661 1629 32 32\n",
"by 1097 1066 31 31\n",
"ethic 1356 1326 30 30\n",
"from 1106 1078 28 28\n",
"thi 1005 978 27 27\n",
"cultur 923 897 26 26\n",
"other 780 756 24 24\n",
"year 240 216 24 24\n",
"have 824 801 23 23\n",
"behavior 776 753 23 23\n",
"studi 602 579 23 23\n",
"page 197 174 23 23\n",
"theori 571 549 22 22"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"morality = clean_and_compute(morality)\n",
"morality.head(20)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Ethics: 3739 revisions\n",
" \n"
]
}
],
"source": [
"ethics = source_stems(\"Ethics\")\n",
"ethics = to_df(ethics)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" added | \n",
" deleted | \n",
" added - deleted | \n",
" abs(added - deleted) | \n",
"
\n",
" \n",
" \n",
" \n",
" | ethic | \n",
" 10236 | \n",
" 10012 | \n",
" 224 | \n",
" 224 | \n",
"
\n",
" \n",
" | moral | \n",
" 2874 | \n",
" 2800 | \n",
" 74 | \n",
" 74 | \n",
"
\n",
" \n",
" | be | \n",
" 3130 | \n",
" 3066 | \n",
" 64 | \n",
" 64 | \n",
"
\n",
" \n",
" | are | \n",
" 2791 | \n",
" 2744 | \n",
" 47 | \n",
" 47 | \n",
"
\n",
" \n",
" | on | \n",
" 1924 | \n",
" 1877 | \n",
" 47 | \n",
" 47 | \n",
"
\n",
" \n",
" | right | \n",
" 2024 | \n",
" 1982 | \n",
" 42 | \n",
" 42 | \n",
"
\n",
" \n",
" | philosophi | \n",
" 1356 | \n",
" 1318 | \n",
" 38 | \n",
" 38 | \n",
"
\n",
" \n",
" | not | \n",
" 1585 | \n",
" 1548 | \n",
" 37 | \n",
" 37 | \n",
"
\n",
" \n",
" | with | \n",
" 1681 | \n",
" 1645 | \n",
" 36 | \n",
" 36 | \n",
"
\n",
" \n",
" | by | \n",
" 1795 | \n",
" 1761 | \n",
" 34 | \n",
" 34 | \n",
"
\n",
" \n",
" | cite | \n",
" 320 | \n",
" 289 | \n",
" 31 | \n",
" 31 | \n",
"
\n",
" \n",
" | wa | \n",
" 1461 | \n",
" 1431 | \n",
" 30 | \n",
" 30 | \n",
"
\n",
" \n",
" | one | \n",
" 1430 | \n",
" 1401 | \n",
" 29 | \n",
" 29 | \n",
"
\n",
" \n",
" | good | \n",
" 1406 | \n",
" 1377 | \n",
" 29 | \n",
" 29 | \n",
"
\n",
" \n",
" | virtu | \n",
" 782 | \n",
" 753 | \n",
" 29 | \n",
" 29 | \n",
"
\n",
" \n",
" | theori | \n",
" 1752 | \n",
" 1725 | \n",
" 27 | \n",
" 27 | \n",
"
\n",
" \n",
" | thi | \n",
" 1514 | \n",
" 1487 | \n",
" 27 | \n",
" 27 | \n",
"
\n",
" \n",
" | action | \n",
" 831 | \n",
" 804 | \n",
" 27 | \n",
" 27 | \n",
"
\n",
" \n",
" | person | \n",
" 1333 | \n",
" 1307 | \n",
" 26 | \n",
" 26 | \n",
"
\n",
" \n",
" | from | \n",
" 1111 | \n",
" 1087 | \n",
" 24 | \n",
" 24 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" added deleted added - deleted abs(added - deleted)\n",
"ethic 10236 10012 224 224\n",
"moral 2874 2800 74 74\n",
"be 3130 3066 64 64\n",
"are 2791 2744 47 47\n",
"on 1924 1877 47 47\n",
"right 2024 1982 42 42\n",
"philosophi 1356 1318 38 38\n",
"not 1585 1548 37 37\n",
"with 1681 1645 36 36\n",
"by 1795 1761 34 34\n",
"cite 320 289 31 31\n",
"wa 1461 1431 30 30\n",
"one 1430 1401 29 29\n",
"good 1406 1377 29 29\n",
"virtu 782 753 29 29\n",
"theori 1752 1725 27 27\n",
"thi 1514 1487 27 27\n",
"action 831 804 27 27\n",
"person 1333 1307 26 26\n",
"from 1111 1087 24 24"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ethics = clean_and_compute(ethics)\n",
"ethics.head(20)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"love.to_csv(\"data/love.terms.csv\", encoding=\"utf-8\")\n",
"wisdom.to_csv(\"data/wisdom.terms.csv\", encoding=\"utf-8\")\n",
"ethics.to_csv(\"data/ethics.terms.csv\", encoding=\"utf-8\")\n",
"morality.to_csv(\"data/morality.terms.csv\", encoding=\"utf-8\")"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" added | \n",
" deleted | \n",
" added - deleted | \n",
" abs(added - deleted) | \n",
"
\n",
" \n",
" \n",
" \n",
" | nature, | \n",
" 1 | \n",
" 7 | \n",
" -6 | \n",
" 6 | \n",
"
\n",
" \n",
" | band | \n",
" 175 | \n",
" 180 | \n",
" -5 | \n",
" 5 | \n",
"
\n",
" \n",
" | love== | \n",
" 160 | \n",
" 165 | \n",
" -5 | \n",
" 5 | \n",
"
\n",
" \n",
" | br | \n",
" 225 | \n",
" 229 | \n",
" -4 | \n",
" 4 | \n",
"
\n",
" \n",
" | 13 | \n",
" 157 | \n",
" 161 | \n",
" -4 | \n",
" 4 | \n",
"
\n",
" \n",
" | februari | \n",
" 44 | \n",
" 48 | \n",
" -4 | \n",
" 4 | \n",
"
\n",
" \n",
" | |love | \n",
" 30 | \n",
" 34 | \n",
" -4 | \n",
" 4 | \n",
"
\n",
" \n",
" | fact|dat | \n",
" 4 | \n",
" 8 | \n",
" -4 | \n",
" 4 | \n",
"
\n",
" \n",
" | food, | \n",
" 2 | \n",
" 6 | \n",
" -4 | \n",
" 4 | \n",
"
\n",
" \n",
" | organizations, | \n",
" 2 | \n",
" 6 | \n",
" -4 | \n",
" 4 | \n",
"
\n",
" \n",
" | g | \n",
" 102 | \n",
" 105 | \n",
" -3 | \n",
" 3 | \n",
"
\n",
" \n",
" | iniqu | \n",
" 9 | \n",
" 12 | \n",
" -3 | \n",
" 3 | \n",
"
\n",
" \n",
" | /b | \n",
" 7 | \n",
" 10 | \n",
" -3 | \n",
" 3 | \n",
"
\n",
" \n",
" | 86 | \n",
" 7 | \n",
" 10 | \n",
" -3 | \n",
" 3 | \n",
"
\n",
" \n",
" | feelings, | \n",
" 5 | \n",
" 8 | \n",
" -3 | \n",
" 3 | \n",
"
\n",
" \n",
" | id | \n",
" 404 | \n",
" 406 | \n",
" -2 | \n",
" 2 | \n",
"
\n",
" \n",
" | me | \n",
" 227 | \n",
" 229 | \n",
" -2 | \n",
" 2 | \n",
"
\n",
" \n",
" | |ero | \n",
" 223 | \n",
" 225 | \n",
" -2 | \n",
" 2 | \n",
"
\n",
" \n",
" | etc | \n",
" 190 | \n",
" 192 | \n",
" -2 | \n",
" 2 | \n",
"
\n",
" \n",
" | patient | \n",
" 173 | \n",
" 175 | \n",
" -2 | \n",
" 2 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" added deleted added - deleted abs(added - deleted)\n",
"nature, 1 7 -6 6\n",
"band 175 180 -5 5\n",
"love== 160 165 -5 5\n",
"br 225 229 -4 4\n",
"13 157 161 -4 4\n",
"februari 44 48 -4 4\n",
"|love 30 34 -4 4\n",
"fact|dat 4 8 -4 4\n",
"food, 2 6 -4 4\n",
"organizations, 2 6 -4 4\n",
"g 102 105 -3 3\n",
"iniqu 9 12 -3 3\n",
"/b 7 10 -3 3\n",
"86 7 10 -3 3\n",
"feelings, 5 8 -3 3\n",
"id 404 406 -2 2\n",
"me 227 229 -2 2\n",
"|ero 223 225 -2 2\n",
"etc 190 192 -2 2\n",
"patient 173 175 -2 2"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"love[ love[\"added - deleted\"] < 0 ].head(20)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.9"
}
},
"nbformat": 4,
"nbformat_minor": 0
}