{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "from ipywidgets import widgets, interact\n", "\n", "wine = pd.read_csv('wdc_cleaned.csv')\n", "\n", "# I only want wine I can get today so let's exclude futures\n", "wine = wine[wine['notes'] != 'Futures Pre-Sale']\n", "\n", "# now let's remove duplicates: only keep the cheapest of each (nameyear, rating)\n", "wine['id'] = wine['nameyear'] + str(wine['rating']) # + wine['rater'] - can't decide whether to add this\n", "wine = wine.sort_values(by=['id', 'price_per_750'])\n", "wine['duplicate'] = (wine['id'] == wine['id'].shift())\n", "wine = wine[~wine['duplicate']]\n", "wine = wine.drop(['id', 'duplicate'], axis = 1)\n", "\n", "# I've also decided to drop wine.com and Wine&Spirits from the list of reviewers\n", "# due to high correlation with price and also the bottom 4 by number of reviews\n", "wine = wine[~wine['rater'].isin(['wine.com', 'Wine & Spirits', 'Tim Atkin', 'Tasting Panel', 'Conn Guide', 'IWC'])]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Top 12 Best Value for Fixed Cost\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a9e5f31253994435acab825d7cd20a66", "version_major": 2, "version_minor": 0 }, "text/plain": [ "interactive(children=(Dropdown(description='Max Price:', index=2, options=(('$10', 10), ('$20', 20), ('$30', 3…" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "def best_at_price(max_cost):\n", " # return the highest rated, lowest priced wines\n", " wine['price_inv'] = wine['price_per_750']**(-1)\n", " wine_sel = wine[wine['price_per_750']<=max_cost].sort_values(by=['rating', 'price_inv'], ascending=False).iloc[: 12]\n", " print('-'*75)\n", " print(f'Highest rated wines below ${max_cost} per bottle:')\n", " print('.'*75)\n", " for row in wine_sel.index:\n", " prt_str = '{0}, {1} for ${2:.2f}, rated {3} by {4}'\n", " print(prt_str.format(wine_sel.loc[row, 'nameyear'], wine_sel.loc[row, 'country'], \n", " wine_sel.loc[row, 'price_per_750'], wine_sel.loc[row, 'rating'], \n", " wine_sel.loc[row, 'rater']))\n", " print('-'*75)\n", "\n", "costs = [10, 20, 30, 40, 50, 75, 100, 250, 5000]\n", "cost_w = widgets.Dropdown(\n", " description='Max Price:',\n", " options=[(f'${i}',i) for i in costs],\n", " value=30 \n", ")\n", "_ = interact(best_at_price, max_cost=cost_w)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Top 12 Cheapest Wines for Fixed Rating\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "2e188f6c34534807a56a29cd47473beb", "version_major": 2, "version_minor": 0 }, "text/plain": [ "interactive(children=(Dropdown(description='Min Rating:', index=2, options=(88, 89, 90, 91, 92, 93, 94, 95, 96…" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "def cheap_at_rating(rat):\n", " wine_sel = wine[wine['rating']>=rat].sort_values(by=['price_per_750']).iloc[: 12]\n", " print('-'*75)\n", " print(f'Cheapest wines rated at least {rat} per bottle:')\n", " print('.'*75)\n", " for row in wine_sel.index:\n", " prt_str = '{0}, {1} for ${2:.2f}, rated {3} by {4}'\n", " print(prt_str.format(wine_sel.loc[row, 'nameyear'], wine_sel.loc[row, 'country'], \n", " wine_sel.loc[row, 'price_per_750'], wine_sel.loc[row, 'rating'], \n", " wine_sel.loc[row, 'rater']))\n", " print('-'*75)\n", "\n", "ratings = list(range(wine['rating'].min(),101))\n", "rat_w = widgets.Dropdown(\n", " description='Min Rating:',\n", " options=ratings,\n", " value=90 \n", ")\n", "_ = interact(cheap_at_rating, rat=rat_w)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Top 12 Best Value Score per Region\n", "\n", "I define 'value' as the percentage discount to expected price. 'Expected price' is determined by a linear regression of price vs rating across all the wines in the sample. I've decided to exclude wines above $500 and ratings below 90 in this sample." ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "5ab3fa0f0ed14a6b897f6408ed148a78", "version_major": 2, "version_minor": 0 }, "text/plain": [ "interactive(children=(Dropdown(description='Country:', options=('All', 'USA', 'Italy', 'Argentina', 'France', …" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# For the value analysis I want to drop the most expensive wines and the lowest ratings\n", "wine = wine[wine['price_per_750']<=500]\n", "wine = wine[wine['rating']>=90]\n", "\n", "# First let's do a price vs. rating linear regression across all wines\n", "X = wine['rating'].to_numpy()\n", "Y = wine['price_per_750'].to_numpy()\n", "beta_0 = Y.mean()\n", "rat_mean = X.mean()\n", "Y = Y - beta_0\n", "X = X - X.mean()\n", "beta = X.T @ Y / (X.T @ X)\n", "\n", "def est_price(rating):\n", " return beta_0 + beta*(rating - rat_mean)\n", "\n", "wine['est_price'] = wine['rating'].apply(est_price)\n", "wine['value_idx'] = (wine['est_price'] - wine['price_per_750']) / wine['est_price']\n", "\n", "def subregion_list(ctry):\n", " temp_s = wine[wine['country']==ctry]['subregion']\n", " temp_s = temp_s[~temp_s.isnull()]\n", " return temp_s.unique().tolist()\n", "\n", "def update_sub_w(change):\n", " if ctry_w.value == 'All':\n", " sub_w.options = ['All']\n", " else:\n", " sub_w.options = ['All'] + subregion_list(ctry_w.value)\n", " best_in_regn(change, change)\n", " \n", "def best_in_regn(ctry, subr):\n", " if ctry_w.value == 'All':\n", " wine_sel = wine\n", " else: \n", " if sub_w.value == 'All':\n", " wine_sel = wine[wine['country']==ctry_w.value]\n", " else:\n", " wine_sel = wine[(wine['country']==ctry_w.value) & (wine['subregion']==sub_w.value)]\n", " wine_sel = wine_sel.sort_values(by=['value_idx'], ascending=False).iloc[: 12]\n", " print('-'*75)\n", " print(f'Best value wines from {ctry_w.value}, {sub_w.value}:')\n", " print('.'*75)\n", " for row in wine_sel.index:\n", " prt_str = '{0}, {1} for ${2:.2f}, rated {3} by {4}'\n", " print(prt_str.format(wine_sel.loc[row, 'nameyear'], wine_sel.loc[row, 'country'], \n", " wine_sel.loc[row, 'price_per_750'], wine_sel.loc[row, 'rating'], \n", " wine_sel.loc[row, 'rater']))\n", " print('-'*75)\n", "\n", "ctry_w = widgets.Dropdown(\n", " description='Country:',\n", " options=['All'] + wine['country'].unique().tolist(),\n", " value='All'\n", ")\n", "sub_w = widgets.Dropdown(\n", " description='Region:',\n", " options=['All'],\n", " value='All' \n", ")\n", "ctry_w.observe(update_sub_w, 'value')\n", "_ = interact(best_in_regn, ctry=ctry_w, subr=sub_w)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.2" } }, "nbformat": 4, "nbformat_minor": 4 }