\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
namesimilarity
author
4976703 alexanderwales 0.384615
5118664 daystar721 0.333333
3989854 Sir Poley 0.222222
4767519 Scientist's Thesis 0.187500
3344060 Velorien 0.166667
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 7, "text": [ " name similarity\n", "author \n", "4976703 alexanderwales 0.384615\n", "5118664 daystar721 0.333333\n", "3989854 Sir Poley 0.222222\n", "4767519 Scientist's Thesis 0.187500\n", "3344060 Velorien 0.166667" ] } ], "prompt_number": 7 }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Scoring Stories\n", "\n", "Calculating the weighted average of all stories by author similarity" ] }, { "cell_type": "code", "collapsed": false, "input": [ "# The sum of the similarity of every author who has favourited this story + the writer's similarity\n", "stories[\"sim_total\"] = authors.ix[stories[\"author\"]][\"similarity\"].values\n", "\n", "# The total number of times this story has been favourited + written (1)\n", "stories[\"sim_count\"] = 1\n", "\n", "for author in authors.iterrows():\n", " author_favs = favourite_stories.get(author[0], Series())\n", " stories.loc[author_favs, \"sim_total\"] += author[1][\"similarity\"]\n", " stories.loc[author_favs, \"sim_count\"] += 1\n", "\n", "stories[\"sim_score\"] = stories[\"sim_total\"].div(stories[\"sim_count\"])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 8 }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Stories by average score" ] }, { "cell_type": "code", "collapsed": false, "input": [ "stories.sort(\"sim_score\", ascending=False)[:5][[\"title\", \"sim_total\", \"sim_count\", \"sim_score\"]]" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
titlesim_totalsim_countsim_score
story
10327510 A Bluer Shade of White 0.384615 1 0.384615
10023949 Harry Potter and the Philosopher\\'s Zombie 0.717949 2 0.358974
9676374 Daystar\\'s Remix of Rationality 0.333333 1 0.333333
9794740 Pokemon: The Origin of Species 0.967949 4 0.241987
9658524 Branches on the Tree of Time 0.469361 2 0.234681
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 9, "text": [ " title sim_total sim_count \\\n", "story \n", "10327510 A Bluer Shade of White 0.384615 1 \n", "10023949 Harry Potter and the Philosopher\\'s Zombie 0.717949 2 \n", "9676374 Daystar\\'s Remix of Rationality 0.333333 1 \n", "9794740 Pokemon: The Origin of Species 0.967949 4 \n", "9658524 Branches on the Tree of Time 0.469361 2 \n", "\n", " sim_score \n", "story \n", "10327510 0.384615 \n", "10023949 0.358974 \n", "9676374 0.333333 \n", "9794740 0.241987 \n", "9658524 0.234681 " ] } ], "prompt_number": 9 }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Stories by total similarity" ] }, { "cell_type": "code", "collapsed": false, "input": [ "stories.sort(\"sim_total\", ascending=False)[:5][[\"title\", \"sim_total\", \"sim_count\", \"sim_score\"]]" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
titlesim_totalsim_countsim_score
story
8096183 Harry Potter and the Natural 20 1.573355 18 0.087409
5782108 Harry Potter and the Methods of Rationality 1.486540 18 0.082586
9794740 Pokemon: The Origin of Species 0.967949 4 0.241987
10023949 Harry Potter and the Philosopher\\'s Zombie 0.717949 2 0.358974
10360716 The Metropolitan Man 0.705458 5 0.141092
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 10, "text": [ " title sim_total sim_count \\\n", "story \n", "8096183 Harry Potter and the Natural 20 1.573355 18 \n", "5782108 Harry Potter and the Methods of Rationality 1.486540 18 \n", "9794740 Pokemon: The Origin of Species 0.967949 4 \n", "10023949 Harry Potter and the Philosopher\\'s Zombie 0.717949 2 \n", "10360716 The Metropolitan Man 0.705458 5 \n", "\n", " sim_score \n", "story \n", "8096183 0.087409 \n", "5782108 0.082586 \n", "9794740 0.241987 \n", "10023949 0.358974 \n", "10360716 0.141092 " ] } ], "prompt_number": 10 }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Stories by times favourited" ] }, { "cell_type": "code", "collapsed": false, "input": [ "stories.sort(\"sim_count\", ascending=False)[:5][[\"title\", \"sim_total\", \"sim_count\", \"sim_score\"]]" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
titlesim_totalsim_countsim_score
story
8096183 Harry Potter and the Natural 20 1.573355 18 0.087409
5782108 Harry Potter and the Methods of Rationality 1.486540 18 0.082586
2731239 Team 8 0.193825 14 0.013845
5193644 Time Braid 0.683962 13 0.052612
5409165 It\\'s For a Good Cause, I Swear! 0.058134 11 0.005285
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 11, "text": [ " title sim_total sim_count \\\n", "story \n", "8096183 Harry Potter and the Natural 20 1.573355 18 \n", "5782108 Harry Potter and the Methods of Rationality 1.486540 18 \n", "2731239 Team 8 0.193825 14 \n", "5193644 Time Braid 0.683962 13 \n", "5409165 It\\'s For a Good Cause, I Swear! 0.058134 11 \n", "\n", " sim_score \n", "story \n", "8096183 0.087409 \n", "5782108 0.082586 \n", "2731239 0.013845 \n", "5193644 0.052612 \n", "5409165 0.005285 " ] } ], "prompt_number": 11 } ], "metadata": {} } ] }