{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Game recommendation on GOG.com\n", "\n", "## Loading data" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "games 3778\n", "reviewed games 2256\n" ] } ], "source": [ "import json, glob\n", "\n", "games = json.load(open('games.json'))\n", "reviews = {}\n", "for filepath in glob.glob('reviews/*.json'):\n", " game = filepath.replace('reviews/', '').replace('.json', '')\n", " reviews[game] = json.load(open(filepath))\n", "\n", "print('games', len(games))\n", "print('reviewed games', len(reviews))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Similarity metric" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def similarity(game1, game2):\n", " game1_users = set(review['reviewer']['username'] for review in reviews[game1])\n", " game2_users = set(review['reviewer']['username'] for review in reviews[game2])\n", " \n", " if len(game1_users | game2_users) == 0:\n", " return 1\n", " \n", " return 1 - len(game1_users & game2_users) / len(game1_users | game2_users)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Top 3 most similar game for popular games" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "diablo\n", " > warcraft_2_battlenet_edition 0.9855855855855856\n", " > elder_scrolls_iv_oblivion_game_of_the_year_edition_deluxe_the 0.9897260273972602\n", " > blade_runner 0.990521327014218\n", "firewatch\n", " > what_remains_of_edith_finch 0.9571428571428572\n", " > the_vanishing_of_ethan_carter 0.9788732394366197\n", " > gone_home 0.9804560260586319\n", "legend_of_grimrock\n", " > legend_of_grimrock_2 0.9655172413793104\n", " > the_book_of_unwritten_tales 0.9795081967213115\n", " > gothic_3 0.9857142857142858\n", "elex\n", " > seven_the_days_long_gone 0.9786324786324786\n", " > divinity_original_sin_enhanced_edition 0.9805194805194806\n", " > kingdom_come_deliverance 0.9821428571428571\n", "deus_ex\n", " > deus_ex_invisible_war 0.937037037037037\n", " > system_shock_2 0.9742268041237113\n", " > star_wars_knights_of_the_old_republic 0.9787234042553191\n", "dungeon_keeper\n", " > dungeon_keeper_2 0.9769820971867008\n", " > jade_empire_special_edition 0.9844236760124611\n", " > nox 0.9870466321243523\n", "dungeon_keeper_2\n", " > dungeon_keeper 0.9769820971867008\n", " > jade_empire_special_edition 0.9844236760124611\n", " > theme_hospital 0.9872773536895675\n", "total_anihilation_commander_pack\n", " > total_annihilation_kingdoms 0.9723320158102767\n", " > dark_reign_expansion 0.9834710743801653\n", " > infested_planet 0.9855072463768116\n", "the_witcher\n", " > dragon_age_origins 0.9821882951653944\n", " > vampire_the_masquerade_bloodlines 0.9840425531914894\n", " > alan_wake 0.9873015873015873\n", "sid_meiers_alpha_centauri\n", " > wing_commander_4_the_price_of_freedom 0.9881422924901185\n", " > wing_commander_3_heart_of_the_tiger 0.9891696750902527\n", " > heroes_of_might_and_magic_5_bundle 0.9894366197183099\n" ] } ], "source": [ "for game, _ in list(sorted(reviews.items(), key=lambda game: -len(game[1])))[:10]:\n", " sims = [(other_game, similarity(game, other_game)) for other_game in reviews if other_game != game]\n", " sims.sort(key=lambda x: x[1])\n", " print(game)\n", " for other_game, sim in sims[:3]:\n", " if sim < 1:\n", " print(' >', other_game, sim)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Top 20 most similar games to Red Faction (old-school FPS)" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "red_faction\n", " > red_faction_2 0.8867924528301887\n", " > stalker_clear_sky 0.9565217391304348\n", " > serious_sam_the_first_encounter 0.9634146341463414\n", " > sea_dogs 0.9672131147540983\n", " > call_of_juarez 0.967741935483871\n", " > serious_sam_the_second_encounter 0.967741935483871\n", " > terminal_velocity 0.9682539682539683\n", " > hogs_of_war 0.9692307692307692\n", " > syndicate_wars 0.9714285714285714\n", " > quake_4 0.971830985915493\n", " > tomb_raider_the_angel_of_darkness 0.9722222222222222\n", " > abandon_ship 0.975609756097561\n", " > indiana_jones_and_the_emperors_tomb 0.9759036144578314\n", " > sniper_ghost_warrior_3 0.9761904761904762\n", " > delta_force_land_warrior 0.9767441860465116\n", " > star_wolves_3_civil_war 0.9767441860465116\n", " > unholy_heights 0.9777777777777777\n", " > judge_dredd_dredd_vs_death 0.9777777777777777\n", " > brothers_in_arms_hells_highway 0.9782608695652174\n", " > wing_commander_armada 0.9782608695652174\n" ] } ], "source": [ "game = 'red_faction'\n", "sims = [(other_game, similarity(game, other_game)) for other_game in reviews if other_game != game]\n", "sims.sort(key=lambda x: x[1])\n", "print(game)\n", "for other_game, sim in sims[:20]:\n", " print(' >', other_game, sim)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Plotting that on a map" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/damien/.venv/lib/python3.6/site-packages/umap/umap_.py:1495: UserWarning: custom distance metric does not return gradient; inverse_transform will be unavailable. To enable using inverse_transform method method, define a distance function that returns a tuple of (distance [float], gradient [np.array])\n", " \"custom distance metric does not return gradient; inverse_transform will be unavailable. \"\n" ] }, { "data": { "text/html": [ "\n", "
" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/javascript": [ "\n", "(function(root) {\n", " function now() {\n", " return new Date();\n", " }\n", "\n", " var force = true;\n", "\n", " if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n", " root._bokeh_onload_callbacks = [];\n", " root._bokeh_is_loading = undefined;\n", " }\n", "\n", " var JS_MIME_TYPE = 'application/javascript';\n", " var HTML_MIME_TYPE = 'text/html';\n", " var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n", " var CLASS_NAME = 'output_bokeh rendered_html';\n", "\n", " /**\n", " * Render data to the DOM node\n", " */\n", " function render(props, node) {\n", " var script = document.createElement(\"script\");\n", " node.appendChild(script);\n", " }\n", "\n", " /**\n", " * Handle when an output is cleared or removed\n", " */\n", " function handleClearOutput(event, handle) {\n", " var cell = handle.cell;\n", "\n", " var id = cell.output_area._bokeh_element_id;\n", " var server_id = cell.output_area._bokeh_server_id;\n", " // Clean up Bokeh references\n", " if (id != null && id in Bokeh.index) {\n", " Bokeh.index[id].model.document.clear();\n", " delete Bokeh.index[id];\n", " }\n", "\n", " if (server_id !== undefined) {\n", " // Clean up Bokeh references\n", " var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n", " cell.notebook.kernel.execute(cmd, {\n", " iopub: {\n", " output: function(msg) {\n", " var id = msg.content.text.trim();\n", " if (id in Bokeh.index) {\n", " Bokeh.index[id].model.document.clear();\n", " delete Bokeh.index[id];\n", " }\n", " }\n", " }\n", " });\n", " // Destroy server and session\n", " var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n", " cell.notebook.kernel.execute(cmd);\n", " }\n", " }\n", "\n", " /**\n", " * Handle when a new output is added\n", " */\n", " function handleAddOutput(event, handle) {\n", " var output_area = handle.output_area;\n", " var output = handle.output;\n", "\n", " // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n", " if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n", " return\n", " }\n", "\n", " var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n", "\n", " if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n", " toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n", " // store reference to embed id on output_area\n", " output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n", " }\n", " if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n", " var bk_div = document.createElement(\"div\");\n", " bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n", " var script_attrs = bk_div.children[0].attributes;\n", " for (var i = 0; i < script_attrs.length; i++) {\n", " toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n", " toinsert[toinsert.length - 1].firstChild.textContent = bk_div.children[0].textContent\n", " }\n", " // store reference to server id on output_area\n", " output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n", " }\n", " }\n", "\n", " function register_renderer(events, OutputArea) {\n", "\n", " function append_mime(data, metadata, element) {\n", " // create a DOM node to render to\n", " var toinsert = this.create_output_subarea(\n", " metadata,\n", " CLASS_NAME,\n", " EXEC_MIME_TYPE\n", " );\n", " this.keyboard_manager.register_events(toinsert);\n", " // Render to node\n", " var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n", " render(props, toinsert[toinsert.length - 1]);\n", " element.append(toinsert);\n", " return toinsert\n", " }\n", "\n", " /* Handle when an output is cleared or removed */\n", " events.on('clear_output.CodeCell', handleClearOutput);\n", " events.on('delete.Cell', handleClearOutput);\n", "\n", " /* Handle when a new output is added */\n", " events.on('output_added.OutputArea', handleAddOutput);\n", "\n", " /**\n", " * Register the mime type and append_mime function with output_area\n", " */\n", " OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n", " /* Is output safe? */\n", " safe: true,\n", " /* Index of renderer in `output_area.display_order` */\n", " index: 0\n", " });\n", " }\n", "\n", " // register the mime type if in Jupyter Notebook environment and previously unregistered\n", " if (root.Jupyter !== undefined) {\n", " var events = require('base/js/events');\n", " var OutputArea = require('notebook/js/outputarea').OutputArea;\n", "\n", " if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n", " register_renderer(events, OutputArea);\n", " }\n", " }\n", "\n", " \n", " if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n", " root._bokeh_timeout = Date.now() + 5000;\n", " root._bokeh_failed_load = false;\n", " }\n", "\n", " var NB_LOAD_WARNING = {'data': {'text/html':\n", " \"\\n\"+\n", " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n", " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n", " \"
\\n\"+\n", " \"\\n\"+\n",
" \"from bokeh.resources import INLINE\\n\"+\n",
" \"output_notebook(resources=INLINE)\\n\"+\n",
" \"
\\n\"+\n",
" \"\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"
\\n\"+\n \"\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"
\\n\"+\n \"