{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "fleet-turkish",
   "metadata": {},
   "source": [
    "### Question one (a)\n",
    "\n",
    "Think about what could be going wrong with our calculation. Think about a better way to evaluate this data. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "occasional-strand",
   "metadata": {},
   "outputs": [],
   "source": [
    "import warnings\n",
    "warnings.simplefilter(action='ignore', category=FutureWarning)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "vulnerable-usage",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd \n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "choice-routine",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>order_id</th>\n",
       "      <th>shop_id</th>\n",
       "      <th>user_id</th>\n",
       "      <th>order_amount</th>\n",
       "      <th>total_items</th>\n",
       "      <th>payment_method</th>\n",
       "      <th>created_at</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>53</td>\n",
       "      <td>746</td>\n",
       "      <td>224</td>\n",
       "      <td>2</td>\n",
       "      <td>cash</td>\n",
       "      <td>2017-03-13 12:36:56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>92</td>\n",
       "      <td>925</td>\n",
       "      <td>90</td>\n",
       "      <td>1</td>\n",
       "      <td>cash</td>\n",
       "      <td>2017-03-03 17:38:52</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>44</td>\n",
       "      <td>861</td>\n",
       "      <td>144</td>\n",
       "      <td>1</td>\n",
       "      <td>cash</td>\n",
       "      <td>2017-03-14 4:23:56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>18</td>\n",
       "      <td>935</td>\n",
       "      <td>156</td>\n",
       "      <td>1</td>\n",
       "      <td>credit_card</td>\n",
       "      <td>2017-03-26 12:43:37</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>18</td>\n",
       "      <td>883</td>\n",
       "      <td>156</td>\n",
       "      <td>1</td>\n",
       "      <td>credit_card</td>\n",
       "      <td>2017-03-01 4:35:11</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   order_id  shop_id  user_id  order_amount  total_items payment_method  \\\n",
       "0         1       53      746           224            2           cash   \n",
       "1         2       92      925            90            1           cash   \n",
       "2         3       44      861           144            1           cash   \n",
       "3         4       18      935           156            1    credit_card   \n",
       "4         5       18      883           156            1    credit_card   \n",
       "\n",
       "            created_at  \n",
       "0  2017-03-13 12:36:56  \n",
       "1  2017-03-03 17:38:52  \n",
       "2   2017-03-14 4:23:56  \n",
       "3  2017-03-26 12:43:37  \n",
       "4   2017-03-01 4:35:11  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv(\"./2019 Winter Data Science Intern Challenge Data Set - Sheet1.csv\")\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "crazy-maple",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 5000 entries, 0 to 4999\n",
      "Data columns (total 7 columns):\n",
      " #   Column          Non-Null Count  Dtype \n",
      "---  ------          --------------  ----- \n",
      " 0   order_id        5000 non-null   int64 \n",
      " 1   shop_id         5000 non-null   int64 \n",
      " 2   user_id         5000 non-null   int64 \n",
      " 3   order_amount    5000 non-null   int64 \n",
      " 4   total_items     5000 non-null   int64 \n",
      " 5   payment_method  5000 non-null   object\n",
      " 6   created_at      5000 non-null   object\n",
      "dtypes: int64(5), object(2)\n",
      "memory usage: 273.6+ KB\n"
     ]
    }
   ],
   "source": [
    "#check for missing values, there are no missimg values :)\n",
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "metropolitan-smoke",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['cash', 'credit_card', 'debit'], dtype=object)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#check payment methods\n",
    "df[\"payment_method\"].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "solid-chorus",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>order_amount</th>\n",
       "      <th>total_items</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>5000.000000</td>\n",
       "      <td>5000.00000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>3145.128000</td>\n",
       "      <td>8.78720</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>41282.539349</td>\n",
       "      <td>116.32032</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>90.000000</td>\n",
       "      <td>1.00000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>163.000000</td>\n",
       "      <td>1.00000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>284.000000</td>\n",
       "      <td>2.00000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>390.000000</td>\n",
       "      <td>3.00000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>704000.000000</td>\n",
       "      <td>2000.00000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        order_amount  total_items\n",
       "count    5000.000000   5000.00000\n",
       "mean     3145.128000      8.78720\n",
       "std     41282.539349    116.32032\n",
       "min        90.000000      1.00000\n",
       "25%       163.000000      1.00000\n",
       "50%       284.000000      2.00000\n",
       "75%       390.000000      3.00000\n",
       "max    704000.000000   2000.00000"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#check statistics of the data\n",
    "df[[\"order_amount\", \"total_items\"]].describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "european-salem",
   "metadata": {},
   "source": [
    "from the output, it can be easily noticed that mean value is extremely high compared to others values. The minimum value is 90 while the 25th, 50th, and 75th percentile are 163, 284 and 390 respectively. with 704000 been the maximum value we can assume that extremely large values (outliers) like this are what is overstating the value of the mean.\n",
    "\n",
    "A boxplot will show a more detailed view of the outliers in the data."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "deadly-support",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYkAAAEJCAYAAABhbdtlAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Z1A+gAAAACXBIWXMAAAsTAAALEwEAmpwYAAAhTUlEQVR4nO3dfbwVZb338c83QCVNBd23xwDFkgyz8mHfSkan7AGxLKxXD5on0ENyOpbVqe6yqNvSLHt4ZekpuzliQqeDmmWQacRBtMgwNmkgorE1DUgFBZ+PBvq7/5jf1mG1Zu21lf2w8Pt+vdZrz/zmmrmuNWvW/NZ1zey1FBGYmZnV84L+boCZmQ1cThJmZlbJScLMzCo5SZiZWSUnCTMzq+QkYWZmlZwkniVJ10r6YB/X+UVJ/9kH9dwp6c29uP19JD0iaVBv1VFTX689n/JxIOlESb/ahtteKekNOb1NX3tJn5N04bbaXg/qfaekNfn6H9IL2+/z9+X2zkmiRh5kmyTt2N9teS56+0T/XETEXyJil4h4cltvW9LFkr68rbfbjIj4UURM6K5cs22MiFdExLXPtV2S3iBpbc22vxIR/XEy/SbwkXz9b+yH+rc7kk6StLi3tu8kUSJpNPA6IIB39G9r7PlK0uD+bkMv2hdY+Vw3osI2O39t5/v8OXGS2NpkYAlwMTClifIvlfR7SQ9JmitpeNcCSe/I4YIHsncyNuOfkXRD10Ep6V+z3E6SRksKSdMk/VXS3ZI+VVV5gzp+COwD/Dy79Z+us+6ekq7MdTdK+k3Nm+5gScslPSjpUkk7ldY9RVJnrjdP0osz/iVJ5+f0EEmPSvpGzg+V9Lik4aXn2bUPrpV0lqTfSnpY0q8k7Vmqb7KkuyTdL+kLVb0kSdOAE4FP5/P+eZPP51hJN+W+uF7Sqxrs87dIujW38++ASsue/kSXJ7FzJa3P42OFpIOq2pjP6TOSlgOPShpc53nulG1/WNIfJL26VHdI2r80f7GkL0vaGbgaeHHW94ikF6tm+KrqWCq17VNV+69m/7xA0ufz9Vovabak3STtKOkRYBDwR0m3V6x/pKSlWc9SSUeWll0r6WxJvwUeA17S6PXIdf5Z0ioVowPzJe1bs88+LGk1sLqiPT+WdE9u/9eSXlGzj78n6ercr7+V9A+Svp313arSkJqksfkcHsh9/Y7Ssq2GyVTTO8i2fkjS6lz/u3mMjQW+D7wm2/BAvefxnESEH/kAOoFTgcOAzcBeDcpeC6wDDgJ2Bn4C/GcuexnwKPAWYAjw6dz2DhSJ+dfAF4ExwCbgkFxvNEUvZk5u85XABuDNufyLzdSRy+/sWq+i/V+lOLiG5ON1gErr/h54MTAcWAV8KJe9EbgPOBTYETgf+HVp2YqcPhK4HbihtOyPNc9zcGlf3p7PaWjOn5PLDgQeAcbn/vtmvjZ1nxtFgv9yTazR8zkEWA8cQXECm5Lld6yz7T2Bh4F35z77N2AL8MFcfhKwOKePBpYBu1OcuMYCe3fTxpuAUcDQ2tcwX/vNpbo/BfwZGJLLA9i/3n4A3gCsranvi/TsWKq7/+rso3/OdV8C7AL8FPhhaflW7axZdzjF++EDwGDghJzfo3Sc/AV4RS5v6+b1mJRtGZvlPw9cX9OWBVnv0AbP50UUx/q3gZtq9vF9FOeLnYBr8jWZTHEsfRlYlGWHZFs+R3EcvzHbfkDpuX2wtO2TyGOp1NYrKY6nfSjOCxPrld3WD/ckkqTxFF3hyyJiGcVJ6/3drPbDiLg5Ih4FvgC8V8XF2PcBv4iIBRGxmeLENhQ4MiKeojiIPgrMA74efz82+6WIeDQiVgA/oHiz1Kqso8mnvBnYG9g3IjZHxG8ij7h0XkT8NSI2Aj8HDs74icBFEfGHiHgC+CzFp5jRwO+AMZL2AP4RmAmMkLQL8Hrgugbt+UFE/Cki/ge4rFTfu4GfR8TiiPgb8H8p3jA9VfV8pgH/LyJuiIgnI2IW8AQwrs423gqsjIjLc59/G7inor7NFCeXl1Mk31URcXcTbVyT+6CeZaW6v0VxYqrXzp5q5liq2n+1TgS+FRF3RMQjFMfH8WpuOOdtwOqI+GFEbImIOcCtwNtLZS6OiJURsQU4hsavx4eAr+a+3wJ8haJHuW+pzFcjYmPVPo+IiyLi4TzWvwi8WtJupSJXRMSyiHgcuAJ4PCJmR3G97VKKDyFQvE67UHz4+VtEXENx0q/33q5yTkQ8EBF/ARZR/RpsU04Sz5gC/Coi7sv5/6L7Iac1pem7KD4t7EnxieuurgWZGNYAI3L+TooXeTTw3Sa2++I6ZRrW0YRvUHyy+ZWkOySdXrO8/GZ7jOIAr1fvI8D9wIh8o3VQJIR/pEgK1wOvpfsk0ai+p/dHRDyW9fVU1fb3BT6ZXfgHsrs+iup9Xm5LsPVrRWnZNcC/U7y+6yXNkLRrN22su616y/P1XlvRzp5q5liq2n8Nt5XTg4G9etqO0vrldpT3UXevx77Ad0qv60aKXl3V9rYiaZCkcyTdLukhih4VFO/xLveWpv+nzvxWx3Hu26rn1p1mX4NtykmCYrwceC/w+hx/vIei6/rq8rhvHaNK0/tQfHq8D/grxQHatX1l2XU5/zbgNcBCipN1d9v9a50yDeugm0/b+enokxHxEoqL9J+Q9KZG61TUuzOwR6ne6yi60ocAS3P+aOBwimG2nrobGFmqb2jWV6WnvYw1wNkRsXvp8cL8FFuvLU+/NqV9Xr8hEedFxGEUQ2YvA/5PN23sru3lul9AsV+6jo3HgBeWyv5DD7bb3bHUE1tti+L43cLWJ89m1+1av9yO8nPp7vVYA/xLzWs7NCKur9herfdTDFm9GdiN4kMd1Fz3aNJfgVHa+rpf+bk9SvXr151e/SpvJ4nCccCTFG/mg/MxFvgNxdBQlX+SdKCkFwJnApdnN/My4G2S3iRpCPBJiiGM61VckL0Q+CBFT+Xtkt5as90vSHphXiQ7maLbWquyjlx+L8W4cF0qLtbun2+sB/P5P1VVvmQOcLKkg1XcJvwViusOd+by6yj22S05PHRtPtc/R8SGJrZf63KKfXSkpB0ouvyN3qQNn3cd/wF8SNIReSFwZ0lvk/SiOmV/AbxC0rty+OSjVLyZJf3v3OYQihPA4zyzf3vaxi6Hler+OMXrvSSX3QS8Pz/9TqTouXW5F9ijZpikrLtjqSfmAP8mab8cZvwKcGkO93TnKuBlkt6v4sL9+yjek1dWlO/u9fg+8Nmui80qLqC/pwfP5UUU++F+ihP4V3qwbq0bKBL5p1Xc1PEGimG0S3L5TcC78n2/PzC1B9u+FxiZ749tzkmiMIViTPwvEXFP14NiuODEBuOpP6S4eHUPxfjwRwEi4jbgnygu6t5HcTC8PU+aM4C5EXFVRNxPcTBcmOP4Xa6jGApaCHwzIv7uH7S6qQOKC9Ofz652vTukxgD/TXFR+HfA9yJiUXc7KiL+m+L6y08oPsm9FDi+VOR6ivHsrl7DLRQnyGfTiyAiVgKnUbyZ7s72rqd489YzEzgwn/fPmth+B3AKxWu9iWK/n1RR9j7gPcA5FCeOMcBvKza9K0UC2kQxrHA/z/Qae9TGkrkU1w+6Lu6+K8fiAT5GcQw8QHFd4OntRsStFCfvO7LOrYaomjiWeuIiivfFryku4j5O8fp1K98Px1IkqfspLqAfWxoCri3f8PWIiCuArwGX5HDRzRTXMZo1m+K1W0dxHC9pXLxa7su3Z/33Ad8DJudrA3Au8DeKE/4s4Ec92Pw1FLcV3yOp7r56LrruZrEBIC/+dt2x0swnr+ed/HT6ADAmIv7cz80x2+65J2EDnqS3Zzd8Z4o7b1bwzEVEM+tFThLWCiZRXPj7K8WQwvHhLrBZn/Bwk5mZVXJPwszMKm13X2q15557xujRo/u7GWZmLWXZsmX3RURbbXy7SxKjR4+mo6Ojv5thZtZSJNX+tzvg4SYzM2vAScLMzCo5SZiZWSUnCTMzq+QkYWZmlba7u5vMBprii3a35n9itVbRbU9C0gEqfv+36/GQpI+r+K3iBSp+c3WBpGFZXpLOU/EbyMslHVra1pQsv1rSlFL8MBW/AdyZ6yrjdeswaxXlBPGzn/2sbtxsIOs2SUTEbRFxcEQcTPFbro9R/Ezf6cDCiBhD8ZXWXb9sdgzF9+uMofhpyAugOOEDZ1D8lvDhwBmlk/4FFF/X3LXexIxX1WHWUiKCSZMmuQdhLaen1yTeBNweEXdRfOnarIzPovjhHjI+OwpLgN0l7U3x62QL8vdkN1H8APnEXLZrRCzJL22bXbOtenWYtYxyD6LevNlA1tMkcTzFj5cA7FX6Yfd7eOY3bEew9e/Grs1Yo/jaOvFGdWxF0jRJHZI6Nmx4Nj9+ZtZ7jjvuuIbzZgNZ00kifxrvHcCPa5dlD6BX+9GN6oiIGRHRHhHtbW1/99UjZv1OEnPnzvW1CGs5PelJHAP8ISK6ftD83hwqIv+uz/g6tv4x8pEZaxQfWSfeqA6zllC+BlHuQfjahLWKniSJE3hmqAlgHsVvQ5N/55bik/Mup3HAgzlkNB+YIGlYXrCeAMzPZQ9JGpd3NU2u2Va9OsxaRkT83cOsVTT1fxL5s5FvAf6lFD4HuEzSVIofC39vxq8C3krxg/KPAScDRMRGSWcBS7PcmRGxMadPBS4GhgJX56NRHWZm1ge2u1+ma29vD39VuJlZz0haFhHttXF/LYeZmVVykjAzs0pOEmZmVslJwszMKjlJmJlZJScJMzOr5CRhZmaVnCTMzKySk4SZmVVykjAzs0pOEmZmVslJwszMKjlJmJlZJScJMzOr5CRhZmaVnCTMzKySk4SZmVVykjAzs0pOEmZmVqmpJCFpd0mXS7pV0ipJr5E0XNICSavz77AsK0nnSeqUtFzSoaXtTMnyqyVNKcUPk7Qi1zlPkjJetw4zM+sbzfYkvgP8MiJeDrwaWAWcDiyMiDHAwpwHOAYYk49pwAVQnPCBM4AjgMOBM0on/QuAU0rrTcx4VR1mZtYHuk0SknYD/hGYCRARf4uIB4BJwKwsNgs4LqcnAbOjsATYXdLewNHAgojYGBGbgAXAxFy2a0QsiYgAZtdsq14dZmbWB5rpSewHbAB+IOlGSRdK2hnYKyLuzjL3AHvl9AhgTWn9tRlrFF9bJ06DOszMrA80kyQGA4cCF0TEIcCj1Az7ZA8gtn3zmqtD0jRJHZI6NmzY0JvNMDN7XmkmSawF1kbEDTl/OUXSuDeHisi/63P5OmBUaf2RGWsUH1knToM6thIRMyKiPSLa29ramnhKZmbWjG6TRETcA6yRdECG3gTcAswDuu5QmgLMzel5wOS8y2kc8GAOGc0HJkgalhesJwDzc9lDksblXU2Ta7ZVrw4zM+sDg5ssdxrwI0k7AHcAJ1MkmMskTQXuAt6bZa8C3gp0Ao9lWSJio6SzgKVZ7syI2JjTpwIXA0OBq/MBcE5FHWZm1gdUDPVvP9rb26Ojo6O/m2Fm1lIkLYuI9tq4/+PazMwqOUmYmVklJwkzM6vkJGFmZpWcJMzMrJKThJmZVXKSMDOzSk4SZmZWyUnCzMwqOUmYmVklJwkzM6vkJGFmZpWcJMzMrJKThJmZVXKSMDOzSk4SZmZWyUnCzMwqOUmYmVklJwkzM6vUVJKQdKekFZJuktSRseGSFkhanX+HZVySzpPUKWm5pENL25mS5VdLmlKKH5bb78x11agOMzPrGz3pSRwVEQeXfij7dGBhRIwBFuY8wDHAmHxMAy6A4oQPnAEcARwOnFE66V8AnFJab2I3dZiZWR94LsNNk4BZOT0LOK4Unx2FJcDukvYGjgYWRMTGiNgELAAm5rJdI2JJRAQwu2Zb9eowM7M+0GySCOBXkpZJmpaxvSLi7py+B9grp0cAa0rrrs1Yo/jaOvFGdZiZWR8Y3GS58RGxTtL/AhZIurW8MCJCUmz75jVXRyauaQD77LNPbzbDzOx5pameRESsy7/rgSsorincm0NF5N/1WXwdMKq0+siMNYqPrBOnQR217ZsREe0R0d7W1tbMUzIzsyZ0myQk7SzpRV3TwATgZmAe0HWH0hRgbk7PAybnXU7jgAdzyGg+MEHSsLxgPQGYn8sekjQu72qaXLOtenWYmVkfaGa4aS/girwrdTDwXxHxS0lLgcskTQXuAt6b5a8C3gp0Ao8BJwNExEZJZwFLs9yZEbExp08FLgaGAlfnA+CcijrMzKwPqLihaPvR3t4eHR0d/d0MM7OWImlZ6V8cnub/uDYzs0pOEmZmVslJwszMKjlJmJlZJScJMzOr5CRhZmaVnCTMzKySk4SZmVVykjAzs0pOEmZmVslJwszMKjlJmJlZJScJMzOr5CRhZmaVnCTMzKySk4SZmVVykjAzs0pOEmZmVslJwszMKjWdJCQNknSjpCtzfj9JN0jqlHSppB0yvmPOd+by0aVtfDbjt0k6uhSfmLFOSaeX4nXrMDOzvtGTnsTHgFWl+a8B50bE/sAmYGrGpwKbMn5ulkPSgcDxwCuAicD3MvEMAr4LHAMcCJyQZRvVYWZmfaCpJCFpJPA24MKcF/BG4PIsMgs4Lqcn5Ty5/E1ZfhJwSUQ8ERF/BjqBw/PRGRF3RMTfgEuASd3UYWZmfaDZnsS3gU8DT+X8HsADEbEl59cCI3J6BLAGIJc/mOWfjtesUxVvVIeZmfWBbpOEpGOB9RGxrA/a86xImiapQ1LHhg0b+rs5ZmbbjWZ6Eq8F3iHpToqhoDcC3wF2lzQ4y4wE1uX0OmAUQC7fDbi/HK9Zpyp+f4M6thIRMyKiPSLa29ramnhKZmbWjG6TRER8NiJGRsRoigvP10TEicAi4N1ZbAowN6fn5Ty5/JqIiIwfn3c/7QeMAX4PLAXG5J1MO2Qd83KdqjrMzKwPPJf/k/gM8AlJnRTXD2ZmfCawR8Y/AZwOEBErgcuAW4BfAh+OiCfzmsNHgPkUd09dlmUb1WFmZn1AxQf27Ud7e3t0dHT0dzPMzFqKpGUR0V4b939cm5lZJScJMzOr5CRhZmaVnCTMzKySk4SZmVVykjAzs0pOEmZmVslJwszMKjlJmJlZJScJMzOr5CRhZmaVnCTMzKySk4SZmVVykjAzs0pOEmZmVslJwszMKjlJmJlZJScJMzOr5CRhZmaVnCTMzKxSt0lC0k6Sfi/pj5JWSvpSxveTdIOkTkmXStoh4zvmfGcuH13a1mczfpuko0vxiRnrlHR6KV63DjMz6xvN9CSeAN4YEa8GDgYmShoHfA04NyL2BzYBU7P8VGBTxs/Nckg6EDgeeAUwEfiepEGSBgHfBY4BDgROyLI0qMPMzPpAt0kiCo/k7JB8BPBG4PKMzwKOy+lJOU8uf5MkZfySiHgiIv4MdAKH56MzIu6IiL8BlwCTcp2qOszMrA80dU0iP/HfBKwHFgC3Aw9ExJYsshYYkdMjgDUAufxBYI9yvGadqvgeDeqobd80SR2SOjZs2NDMUzIzsyY0lSQi4smIOBgYSfHJ/+W92aieiogZEdEeEe1tbW393Rwzs+1Gj+5uiogHgEXAa4DdJQ3ORSOBdTm9DhgFkMt3A+4vx2vWqYrf36AOMzPrA83c3dQmafecHgq8BVhFkSzencWmAHNzel7Ok8uviYjI+PF599N+wBjg98BSYEzeybQDxcXteblOVR1mZtYHBndfhL2BWXkX0guAyyLiSkm3AJdI+jJwIzAzy88EfiipE9hIcdInIlZKugy4BdgCfDgingSQ9BFgPjAIuCgiVua2PlNRh5mZ9QEVH9i3H+3t7dHR0dHfzTAzaymSlkVEe23c/3FtZmaVnCTMzKySk4SZmVVykjAzs0pOEmZmVslJwszMKjlJmJlZJScJMzOr5CRhZmaVnCTMzKySk4SZmVVykjAzs0pOEmZmVslJwszMKjlJmJlZJScJMzOr5CRhZmaVnCTMzKySk4SZmVXqNklIGiVpkaRbJK2U9LGMD5e0QNLq/Dss45J0nqROScslHVra1pQsv1rSlFL8MEkrcp3zJKlRHWZm1jea6UlsAT4ZEQcC44APSzoQOB1YGBFjgIU5D3AMMCYf04ALoDjhA2cARwCHA2eUTvoXAKeU1puY8ao6zMysD3SbJCLi7oj4Q04/DKwCRgCTgFlZbBZwXE5PAmZHYQmwu6S9gaOBBRGxMSI2AQuAibls14hYEhEBzK7ZVr06zMysD/TomoSk0cAhwA3AXhFxdy66B9grp0cAa0qrrc1Yo/jaOnEa1FHbrmmSOiR1bNiwoSdPyczMGmg6SUjaBfgJ8PGIeKi8LHsAsY3btpVGdUTEjIhoj4j2tra23myGmdnzSlNJQtIQigTxo4j4aYbvzaEi8u/6jK8DRpVWH5mxRvGRdeKN6jAzsz7QzN1NAmYCqyLiW6VF84CuO5SmAHNL8cl5l9M44MEcMpoPTJA0LC9YTwDm57KHJI3LuibXbKteHWZm1gcGN1HmtcAHgBWSbsrY54BzgMskTQXuAt6by64C3gp0Ao8BJwNExEZJZwFLs9yZEbExp08FLgaGAlfngwZ1mJlZH1Ax1L/9aG9vj46Ojv5uhplZS5G0LCLaa+P+j2szM6vkJGFmZpWcJMzMrJKThJmZVXKSMOtlc+bM4aCDDmLQoEEcdNBBzJkzp7+bZNa0Zm6BNbNnac6cOUyfPp2ZM2cyfvx4Fi9ezNSpUwE44YQT+rl1Zt3zLbBmveiggw7i/PPP56ijjno6tmjRIk477TRuvvnmfmyZ2daqboF1kjDrRYMGDeLxxx9nyJAhT8c2b97MTjvtxJNPPtmPLTPbmv9PwqwfjB07lsWLF28VW7x4MWPHju2nFpn1jJOEWS+aPn06U6dOZdGiRWzevJlFixYxdepUpk+f3t9NM2uKL1yb9aKui9OnnXYaq1atYuzYsZx99tm+aG0tw9ckzMzM1yTMzKznnCTMzKySk4SZmVVykjAzs0pOEmZmVslJwszMKjlJmPUyfwustbJuk4SkiyStl3RzKTZc0gJJq/PvsIxL0nmSOiUtl3RoaZ0pWX61pCml+GGSVuQ650lSozrMWknXt8Cef/75PP7445x//vlMnz7dicJaRjM9iYuBiTWx04GFETEGWJjzAMcAY/IxDbgAihM+cAZwBHA4cEbppH8BcEppvYnd1GHWMs4++2xmzpzJUUcdxZAhQzjqqKOYOXMmZ599dn83zawp3SaJiPg1sLEmPAmYldOzgONK8dlRWALsLmlv4GhgQURsjIhNwAJgYi7bNSKWRPGv37NrtlWvDrOWsWrVKsaPH79VbPz48axataqfWmTWM8/2msReEXF3Tt8D7JXTI4A1pXJrM9YovrZOvFEdf0fSNEkdkjo2bNjwLJ6OWe/wt8Baq3vOF66zB9CrXwDVXR0RMSMi2iOiva2trTebYtYj/hZYa3XP9ltg75W0d0TcnUNG6zO+DhhVKjcyY+uAN9TEr834yDrlG9Vh1jL8LbDW6p5tT2Ie0HWH0hRgbik+Oe9yGgc8mENG84EJkoblBesJwPxc9pCkcXlX0+SabdWrw6ylTJ48mZUrV/LUU0+xcuVKJk+e3N9NMmtaM7fAzgF+Bxwgaa2kqcA5wFskrQbenPMAVwF3AJ3AfwCnAkTERuAsYGk+zswYWebCXOd24OqMV9Vh1jKGDBnCli1bGDZsGMuXL2fYsGFs2bJlq58zNRvI/HsSZr1IEsOGDWPjxmduEBw+fDibNm1ie3vvWWvz70mY9ZPrrruu4bzZQOYkYdbLXv/61zecNxvInCTMetHgwYPZtGkTw4cPZ8WKFU8PNQ0e7J+Xt9bgI9WsF23evJkhQ4awadMmXvWqVwFF4ti8eXM/t8ysOU4SZr3MCcFamZOEWS/LLzbeiu9sslbhaxJmvaicIK688sq6cbOBzD0Jsz7Q1XOICCcIaynuSZj1snIPot682UDm/7g260VdvYby+6xezKy/+T+uzfqRJH7xi194qMlajpOEWS8q9xaOPfbYunGzgcxJwszMKjlJmPWi8vDSJZdcUjduNpA5SZj1gYjgfe97n4eZrOU4SZj1snIPot682UDmW2DNepFvgbVW4VtgzfqRJC699FJfi7CW4yRh1ovKvYXjjz++btxsIBvwSULSREm3SeqUdHp/t8fM7PlkQCcJSYOA7wLHAAcCJ0g6sH9bZda88vDSy172srpxs4FsoH8L7OFAZ0TcASDpEmAScEu/tsqsh+pduDZrBQP67iZJ7wYmRsQHc/4DwBER8ZGactOAaQD77LPPYXfddVeft9Va2ytnvbK/m7DNrJiyor+bYC2o6u6mgd6TaEpEzABmQHELbD83x1pQb51YfQustboBfU0CWAeMKs2PzJhZS5HEAQcc4KEmazkDPUksBcZI2k/SDsDxwLx+bpNZ08q9hT/96U9142YD2YAeboqILZI+AswHBgEXRcTKfm6WWY84IVgrG9BJAiAirgKu6u92mJk9Hw304SYzM+tHThJmZlbJScLMzCo5SZiZWaUB/R/Xz4akDYD/5doGoj2B+/q7EWYV9o2IttrgdpckzAYqSR31vvbAbCDzcJOZmVVykjAzs0pOEmZ9Z0Z/N8Csp3xNwszMKrknYWZmlZwkzMyskpOEmZlVcpIwq0PSFyV9qr/bsS1J+lx/t8Faj5OEPe+p8JzeC5IG/NfuA04S1mNOEva8IOkTkm7Ox8cljZZ0m6TZwM3AKEnTJf1J0mLggNK6L5X0S0nLJP1G0sszfrGk70u6Afh6Rb2HS/qdpBslXS/pgIyfJOlnkhZIulPSR7KNN0paIml4ljs455dLukLSsIxfK6k9p/eUdGdpuz/N9q6W9PWMnwMMlXSTpB/1zl627VJE+OHHdv0ADgNWADsDuwArgUOAp4BxNWVeCOwKdAKfymULgTE5fQRwTU5fDFwJDGpQ967A4Jx+M/CTnD4p63gR0AY8CHwol50LfDynlwOvz+kzgW/n9LVAe07vCdxZ2u4dwG7AThTfYzYqlz3S36+FH633aIUustlzNR64IiIeBZD0U+B1wF0RsSTLvC7LPJZl5uXfXYAjgR9L6trejqVt/zginmxQ927ALEljgACGlJYtioiHgYclPQj8POMrgFdJ2g3YPSKuy/gs4MdNPN+FEfFgtv8WYF9gTRPrmf0dJwl7Pnu0iTIvAB6IiIOf5TbOokgG75Q0mqIH0OWJ0vRTpfmn6P69uYVnhot3qllW3u6TTWzLrJKvSdjzwW+A4yS9UNLOwDszVvbrLDNU0ouAtwNExEPAnyW9B56+yP3qHtS9G7Aup0/qSaOzN7BJ0usy9AGgq1dxJ8UQGcC7m9zkZklDui9m9gwnCdvuRcQfKK4f/B64AbgQ2FSnzKXAH4GrgaWlxScCUyX9keJ6xqQeVP914KuSbuTZfaKfAnxD0nLgYIrrEgDfBP41t7tnk9uaASz3hWvrCX93k5mZVXJPwszMKvmCltk2IOlk4GM14d9GxIf7oz1m24qHm8zMrJKHm8zMrJKThJmZVXKSMDOzSk4SZmZW6f8DYAUFZT+T6AYAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "df[\"order_amount\"].plot(kind=\"box\",) \n",
    "plt.title(\"A boxplot showing the distribution of order amount\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "modified-president",
   "metadata": {},
   "source": [
    "The boxplot shows there are alot of outliers, the line seems to be on 0."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "hydraulic-upset",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>order_amount</th>\n",
       "      <th>total_items</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>275</th>\n",
       "      <td>704000</td>\n",
       "      <td>2000</td>\n",
       "      <td>17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>274</th>\n",
       "      <td>154350</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>273</th>\n",
       "      <td>102900</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>272</th>\n",
       "      <td>77175</td>\n",
       "      <td>3</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>271</th>\n",
       "      <td>51450</td>\n",
       "      <td>2</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>270</th>\n",
       "      <td>25725</td>\n",
       "      <td>1</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>269</th>\n",
       "      <td>1760</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>268</th>\n",
       "      <td>1408</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>267</th>\n",
       "      <td>1086</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>266</th>\n",
       "      <td>1064</td>\n",
       "      <td>8</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     order_amount  total_items  count\n",
       "275        704000         2000     17\n",
       "274        154350            6      1\n",
       "273        102900            4      1\n",
       "272         77175            3      9\n",
       "271         51450            2     16\n",
       "270         25725            1     19\n",
       "269          1760            5      1\n",
       "268          1408            4      2\n",
       "267          1086            6      1\n",
       "266          1064            8      1"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "orders = df.groupby([\"order_amount\", \"total_items\"]).size(\n",
    "            ).reset_index(name=\"count\").sort_values(by=\"order_amount\", ascending=False)\n",
    "orders.head(10)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "handled-reservoir",
   "metadata": {},
   "source": [
    "Digging deep into the data by grouping shows that a number of values are extremely high when compared to the rest of the values. Take a look at 704000 we can see from the table that the items ordered made the amount large because if the amount is divided by the total items it shows that each sneaker cost 352. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "established-royalty",
   "metadata": {},
   "outputs": [],
   "source": [
    "max_order_per_item = df[\"order_amount\"] / df['total_items']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "dated-opportunity",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>order_id</th>\n",
       "      <th>shop_id</th>\n",
       "      <th>user_id</th>\n",
       "      <th>order_amount</th>\n",
       "      <th>total_items</th>\n",
       "      <th>payment_method</th>\n",
       "      <th>created_at</th>\n",
       "      <th>max_order_per_item</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>53</td>\n",
       "      <td>746</td>\n",
       "      <td>224</td>\n",
       "      <td>2</td>\n",
       "      <td>cash</td>\n",
       "      <td>2017-03-13 12:36:56</td>\n",
       "      <td>112.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>92</td>\n",
       "      <td>925</td>\n",
       "      <td>90</td>\n",
       "      <td>1</td>\n",
       "      <td>cash</td>\n",
       "      <td>2017-03-03 17:38:52</td>\n",
       "      <td>90.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>44</td>\n",
       "      <td>861</td>\n",
       "      <td>144</td>\n",
       "      <td>1</td>\n",
       "      <td>cash</td>\n",
       "      <td>2017-03-14 4:23:56</td>\n",
       "      <td>144.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>18</td>\n",
       "      <td>935</td>\n",
       "      <td>156</td>\n",
       "      <td>1</td>\n",
       "      <td>credit_card</td>\n",
       "      <td>2017-03-26 12:43:37</td>\n",
       "      <td>156.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>18</td>\n",
       "      <td>883</td>\n",
       "      <td>156</td>\n",
       "      <td>1</td>\n",
       "      <td>credit_card</td>\n",
       "      <td>2017-03-01 4:35:11</td>\n",
       "      <td>156.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   order_id  shop_id  user_id  order_amount  total_items payment_method  \\\n",
       "0         1       53      746           224            2           cash   \n",
       "1         2       92      925            90            1           cash   \n",
       "2         3       44      861           144            1           cash   \n",
       "3         4       18      935           156            1    credit_card   \n",
       "4         5       18      883           156            1    credit_card   \n",
       "\n",
       "            created_at  max_order_per_item  \n",
       "0  2017-03-13 12:36:56               112.0  \n",
       "1  2017-03-03 17:38:52                90.0  \n",
       "2   2017-03-14 4:23:56               144.0  \n",
       "3  2017-03-26 12:43:37               156.0  \n",
       "4   2017-03-01 4:35:11               156.0  "
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[\"max_order_per_item\"] = max_order_per_item\n",
    "df.head()\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "governmental-venture",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>max_order_per_item</th>\n",
       "      <th>order_amount</th>\n",
       "      <th>total_items</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>275</th>\n",
       "      <td>25725.0</td>\n",
       "      <td>154350</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>274</th>\n",
       "      <td>25725.0</td>\n",
       "      <td>102900</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>273</th>\n",
       "      <td>25725.0</td>\n",
       "      <td>77175</td>\n",
       "      <td>3</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>272</th>\n",
       "      <td>25725.0</td>\n",
       "      <td>51450</td>\n",
       "      <td>2</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>271</th>\n",
       "      <td>25725.0</td>\n",
       "      <td>25725</td>\n",
       "      <td>1</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>270</th>\n",
       "      <td>352.0</td>\n",
       "      <td>704000</td>\n",
       "      <td>2000</td>\n",
       "      <td>17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>269</th>\n",
       "      <td>352.0</td>\n",
       "      <td>1760</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>268</th>\n",
       "      <td>352.0</td>\n",
       "      <td>1408</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>267</th>\n",
       "      <td>352.0</td>\n",
       "      <td>1056</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>266</th>\n",
       "      <td>352.0</td>\n",
       "      <td>704</td>\n",
       "      <td>2</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     max_order_per_item  order_amount  total_items  count\n",
       "275             25725.0        154350            6      1\n",
       "274             25725.0        102900            4      1\n",
       "273             25725.0         77175            3      9\n",
       "272             25725.0         51450            2     16\n",
       "271             25725.0         25725            1     19\n",
       "270               352.0        704000         2000     17\n",
       "269               352.0          1760            5      1\n",
       "268               352.0          1408            4      2\n",
       "267               352.0          1056            3      3\n",
       "266               352.0           704            2     13"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "order_per_item = df.groupby([\"max_order_per_item\", \"order_amount\", \"total_items\"]).size(\n",
    "            ).reset_index(name=\"count\").sort_values(by=\"max_order_per_item\", ascending=False)\n",
    "order_per_item.head(10)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "intelligent-missouri",
   "metadata": {},
   "source": [
    "From the table above it is obvious that the first row is extremely large for a pair of sneakers, therefore we can attribute that to be an exception(luxury) or a mistake during data entry.\n",
    "\n",
    "Next, I'll remove the outliers using the interquarter range method to see the effects"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "italian-tractor",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "order_id              2499.5\n",
      "shop_id                 51.0\n",
      "user_id                150.0\n",
      "order_amount           227.0\n",
      "total_items              2.0\n",
      "max_order_per_item      36.0\n",
      "dtype: float64\n"
     ]
    }
   ],
   "source": [
    "q1 = df.quantile(0.25)\n",
    "q3 = df.quantile(0.75)\n",
    "iqr = q3-q1\n",
    "print(iqr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "colonial-edmonton",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>order_id</th>\n",
       "      <th>shop_id</th>\n",
       "      <th>user_id</th>\n",
       "      <th>order_amount</th>\n",
       "      <th>total_items</th>\n",
       "      <th>payment_method</th>\n",
       "      <th>created_at</th>\n",
       "      <th>max_order_per_item</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>53</td>\n",
       "      <td>746</td>\n",
       "      <td>224</td>\n",
       "      <td>2</td>\n",
       "      <td>cash</td>\n",
       "      <td>2017-03-13 12:36:56</td>\n",
       "      <td>112.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>92</td>\n",
       "      <td>925</td>\n",
       "      <td>90</td>\n",
       "      <td>1</td>\n",
       "      <td>cash</td>\n",
       "      <td>2017-03-03 17:38:52</td>\n",
       "      <td>90.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>44</td>\n",
       "      <td>861</td>\n",
       "      <td>144</td>\n",
       "      <td>1</td>\n",
       "      <td>cash</td>\n",
       "      <td>2017-03-14 4:23:56</td>\n",
       "      <td>144.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>18</td>\n",
       "      <td>935</td>\n",
       "      <td>156</td>\n",
       "      <td>1</td>\n",
       "      <td>credit_card</td>\n",
       "      <td>2017-03-26 12:43:37</td>\n",
       "      <td>156.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>18</td>\n",
       "      <td>883</td>\n",
       "      <td>156</td>\n",
       "      <td>1</td>\n",
       "      <td>credit_card</td>\n",
       "      <td>2017-03-01 4:35:11</td>\n",
       "      <td>156.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   order_id  shop_id  user_id  order_amount  total_items payment_method  \\\n",
       "0         1       53      746           224            2           cash   \n",
       "1         2       92      925            90            1           cash   \n",
       "2         3       44      861           144            1           cash   \n",
       "3         4       18      935           156            1    credit_card   \n",
       "4         5       18      883           156            1    credit_card   \n",
       "\n",
       "            created_at  max_order_per_item  \n",
       "0  2017-03-13 12:36:56               112.0  \n",
       "1  2017-03-03 17:38:52                90.0  \n",
       "2   2017-03-14 4:23:56               144.0  \n",
       "3  2017-03-26 12:43:37               156.0  \n",
       "4   2017-03-01 4:35:11               156.0  "
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "new_df = df[~((df < (q1 - (1.5 * iqr))) |\n",
    "              (df > (q3 + (1.5 * iqr)))).any(axis=1)]\n",
    "new_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "absolute-surrey",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    4831.000000\n",
       "mean      292.430346\n",
       "std       143.256683\n",
       "min        90.000000\n",
       "25%       162.000000\n",
       "50%       280.000000\n",
       "75%       380.000000\n",
       "max       730.000000\n",
       "Name: order_amount, dtype: float64"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "new_df[\"order_amount\"].describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "alert-oliver",
   "metadata": {},
   "source": [
    "After removing the outliers it can be noticed that the value of the mean dropped to a more representative value of the entire dataset with other statistics still within range.\n",
    "\n",
    "Let's make a boxplot to show it in details"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "russian-burlington",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEJCAYAAABv6GdPAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Z1A+gAAAACXBIWXMAAAsTAAALEwEAmpwYAAAchElEQVR4nO3de5hdVX3/8fdHEiAETIDYKSaRQYmIpiXgVDHeRqJWQBL0UYpSCTQ2taVeqv403n4/bK3GyyNI66WpUQK1EUTTRIIUGuaIikSJQCCATSCJk5AQAySSRJSE7++PvQZ2xjNzzpmZM5eVz+t5zjN7r732Xmtfzmf2WefMGUUEZmaWl2cMdQfMzGzgOdzNzDLkcDczy5DD3cwsQw53M7MMOdzNzDJ0QIW7pIqkdw5ymxdL+o9BaGeDpNc2cfvPkbRL0kHNaqNbe03bn/J1IOk8STcM4LbXSGpP0wN67iV9VNLXB2p7DbT7Jkmd6fyf3ITtD/rz8kCQRbini+NRSYcMdV/6o9kB3R8R8auIODwi9g30tiVdLulTA73dekTEtyLi9bXq1dvHiHhRRFT62y9J7ZI2ddv2pyNiKELwC8Dfp/N/+xC0nx1JF0j6cTPbGPHhLqkVeCUQwMyh7Y0dqCSNGuo+NNGxwJr+bkSFAcuczI95v434cAfOB24FLgdm11H/eZJ+Juk3kpZKOqprgaSZ6WX1jvRq4MRU/mFJK7suJkl/m+odKqlVUkiaK+lBSVskfbCnxntp40rgOcD308vfD1VZd4Kka9O6j0j6UbcnyzRJqyXtlHSVpENL6/61pHVpvWWSnp3KPynpX9L0aEm7JX0+zY+R9Liko0r72XUMKpL+SdJPJD0m6QZJE0rtnS9po6SHJX2ip1clkuYC5wEfSvv9/Tr3542S7kjH4hZJf9rLMX+dpPvSdv4VUGnZU3dQKXwukbQtXR93SZraUx/TPn1Y0mpgt6RRVfbz0NT3xyT9QtJJpbZD0vGl+cslfUrSWOAHwLNTe7skPVvdhnl6upZKfftgT8ev2/F5hqSPp/O1TdIVksZJOkTSLuAg4E5J9/ew/nRJP0/t/FzS9NKyiqR/lvQTYA/w3N7OR1rnryTdq+LV+H9LOrbbMbtI0lpgbQ/9+Y6krWn7N0t6Ubdj/BVJP0jH9SeS/ljSpam9+1QaepJ0YtqHHelYzywt2284Sd3uxlNf3yVpbVr/y+kaOxH4GvCy1Icd1faj3yJiRD+AdcDfAS8GngBaeqlbATYDU4GxwHeB/0jLng/sBl4HjAY+lLZ9MMUvwZuBi4EpwKPAyWm9VopXDYvTNv8E+DXw2rT84nraSMs3dK3XQ/8/Q3FRjE6PVwIqrfsz4NnAUcC9wLvSstOA7cApwCHAvwA3l5bdlaanA/cDK0vL7uy2n6NKx/L+tE9j0vz8tOyFwC7gFen4fSGdm6r7RvGL+VPdynrbn5OBbcBLKYJndqp/SJVtTwAeA96Sjtk/AHuBd6blFwA/TtN/DqwCxlMEzonAMTX6eAcwGRjT/Rymc/9Eqe0PAuuB0Wl5AMdXOw5AO7CpW3sX09i1VPX4VTlGf5XWfS5wOPA94MrS8v362W3doyieD+8ARgFvS/NHl66TXwEvSsufVeN8zEp9OTHV/zhwS7e+3JjaHdPL/hxBca1fCtzR7Rhvp8iLQ4Gb0jk5n+Ja+hTQkeqOTn35KMV1fFrq+wmlfXtnadsXkK6lUl+vpbienkORC2+oVrcZjxF95y7pFRQvGa+OiFUUYfP2GqtdGRF3R8Ru4BPAOSreJPwLYHlE3BgRT1AE0hhgekQ8SXHy3wMsAz4Xfzj2+MmI2B0RdwHfpLjIu+uxjTp3+QngGODYiHgiIn4U6UpJLouIByPiEeD7wLRUfh7wjYj4RUT8DvgIxV1DK/BTYIqko4FXAQuBiZIOB14N/LCX/nwzIv43In4LXF1q7y3A9yPixxHxe+D/Ulzojeppf+YC/xYRKyNiX0QsAn4HnFplG2cAayLimnTMLwW29tDeExSh8AKKX5r3RsSWOvrYmY5BNatKbX+RIlCq9bNR9VxLPR2/7s4DvhgRD0TELorr41zVN+xxJrA2Iq6MiL0RsRi4DzirVOfyiFgTEXuB0+n9fLwL+Ew69nuBT1O8gju2VOczEfFIT8c8Ir4REY+la/1i4CRJ40pVlkTEqoh4HFgCPB4RV0TxftJVFDcPUJynwyluWn4fETdRhHW153ZP5kfEjoj4FdBBz+dgwI3ocKe4Y7shIran+f+k9tBMZ2l6I8Vv5wkUdzgbuxakQO8EJqb5DRQnpxX4ch3bfXaVOr22UYfPU9xJ3CDpAUnzui0vP0n2UFyY1drdBTwMTExPkNsogvxVFGF+C/Byaod7b+09dTwiYk9qr1E9bf9Y4APppe6O9LJ2Mj0f83Jfgv3PFaVlNwH/SnF+t0laIOmZNfpYdVvVlqfzvamHfjaqnmupp+PX67bS9CigpdF+lNYv96N8jGqdj2OBL5XO6yMUr6J62t5+JB0kab6k+yX9huIVDBTP8S4PlaZ/W2V+v+s4Hdue9q2Wes/BgBux4S5pDHAO8Oo0vraV4iXeSeVxzSoml6afQ3G3th14kOLC6tq+Ut3Naf5M4GXACoqQrbXdB6vU6bUNatzdpruRD0TEcynePH6/pBm9rdNDu2OBo0vt/pDiJefJwM/T/J8DL6EYjmrUFmBSqb0xqb2eNHpX3wn8c0SMLz0OS3eN1fry1LkpHfPqHYm4LCJeTDG09Hzg/9ToY62+l9t+BsVx6bo29gCHler+cQPbrXUtNWK/bVFcv3vZP/TqXbdr/XI/yvtS63x0An/T7dyOiYhbethed2+nGNp5LTCO4mYMuo3r1+lBYLL2f1+rvG+76fn81dL0r+MdseEOnA3so3gSTkuPE4EfUQyh9OQvJb1Q0mHAPwLXpJdjVwNnSpohaTTwAYqX+reoeKPw68A7KV4ZnCXpjG7b/YSkw9KbNxdSvLzrrsc20vKHKMY9q1LxJuLx6QmxM+3/kz3VL1kMXChpmoqPi36aYlx9Q1r+Q4pjdk8aRqmkfV0fEb+uY/vdXUNxjKZLOpjipXFvT65e97uKfwfeJeml6Q2qsZLOlHRElbrLgRdJenMaZngPPTwJJf1Z2uZoiifu4zx9fBvtY5cXl9p+H8X5vjUtuwN4e7rbfAPFK6UuDwFHdxtOKKt1LTViMfAPko5Lw3GfBq5KwyK1XAc8X9LbVbyh/BcUz8lre6hf63x8DfhI15ugKt7YfWsD+3IExXF4mCJ4P93Aut2tpPgF/CEVHzZopxhu+nZafgfw5vS8Px6Y08C2HwImpedHU4zkcJ9NMeb7q4jY2vWgeFl9Xi/jhVdSvKmylWL88z0AEfFL4C8p3mzcTnESz0phtwBYGhHXRcTDFCfx62mcussPKYZMVgBfiIg/+MOYGm1A8Ybpx9NL0mqfuJkC/A/Fm5U/Bb4SER21DlRE/A/F+wvfpbhzeh5wbqnKLRTjtV136fdQBFtf7tqJiDXAuymeBFtSf7dRPOmqWQi8MO33f9Wx/duAv6Y4149SHPcLeqi7HXgrMJ/iCT8F+EkPm34mxS+ORylefj/M06/SGupjyVKK8fGuNx3fnMaaAd5LcQ3soBj3fmq7EXEfReg+kNrcbyinjmupEd+geF7cTPHm4uMU56+m9Hx4I8Uvl4cp3th9Y2motHv9Xs9HRCwBPgt8Ow2r3E0xTl+vKyjO3WaK6/jW3qv3LB3Ls1L724GvAOencwNwCfB7iqBeBHyrgc3fRPHx0q2Sqh6r/ur6pIX1UXpTsusTEPXc6Rxw0t3gDmBKRKwf4u6YHRBG8p27DWOSzkovV8dSfJLjLp5+c8vMmszhbs0yi+INqQcpXnqfG36ZaDZoPCxjZpYh37mbmWVoWHzxzoQJE6K1tXWou2H2B3bv3s3YsWOHuhtmVa1atWp7RDyr2rJhEe6tra3cdtttQ90Nsz9QqVRob28f6m6YVSWp+18HP8XDMmZmGXK4m5llyOFuZpYhh7uZWYYc7mZmGXK4m1WxePFipk6dyowZM5g6dSqLF1f7NmGz4WtYfBTSbDhZvHgxH/vYx1i4cCH79u3joIMOYs6c4ttc3/a2Rv4Jj9nQGRZfP9DW1hb+nLsNF1OnTmXPnj2sX//0F1ged9xxHHbYYdx9991D2DOz/UlaFRFt1ZZ5WMasmzVr1rB+/XpmzpzJkiVLmDlzJuvXr2fNmjVD3TWzujnczaqYPn06S5cuZfz48SxdupTp0+v9H+Zmw4PH3M2q6OzspKOjg3379tHR0UFnZ63/g202vDjczaro7OzktNNOG+pumPWZh2XMzDLkcDczy1DNcJd0gqQ7So/fSHqfpKMk3Shpbfp5ZKovSZdJWidptaRTmr8bZgMvIujo6GA4fFzYrFE1wz0ifhkR0yJiGvBiYA+wBJgHrIiIKcCKNA9wOsX/zJwCzAW+2oR+m5lZLxp9Q3UGcH9EbJQ0C2hP5YuACvBhin+MfEX6Z8i3Shov6ZiI2DJAfTbrM0n9qlvv+r7bt6HWaLifC3R9yUZLKbC3Ai1peiJQ/tzYplTmcLchV0/o9hbgDm0bKeoOd0kHAzOBj3RfFhEhqaGrXtJcimEbWlpaqFQqjaxu1jQdHR285jWvqVru69RGikbu3E8HfhERD6X5h7qGWyQdA2xL5ZuByaX1JqWy/UTEAmABFN8t4/9TacNJ1x1667zlbJh/5hD3xqxxjXwU8m08PSQDsAyYnaZnA0tL5eenT82cCuz0eLuZ2eCq685d0ljgdcDflIrnA1dLmgNsBM5J5dcBZwDrKD5Zc+GA9dbMzOpSV7hHxG7g6G5lD1N8eqZ73QAuGpDemZlZn/gvVM3MMuRwNzPLkMPdzCxDDnczsww53M3MMuRwNzPLkMPdzCxDDnczsww53M3MMuRwNzPLkMPdzCxDDnczsww53M3MMuRwNzPLkMPdzCxDDnczsww53M3MMuRwNzPLkMPdzCxDDnczsww53M3MMuRwNzPLUF3hLmm8pGsk3SfpXkkvk3SUpBslrU0/j0x1JekySeskrZZ0SnN3wczMuqv3zv1LwPUR8QLgJOBeYB6wIiKmACvSPMDpwJT0mAt8dUB7bGZmNdUMd0njgFcBCwEi4vcRsQOYBSxK1RYBZ6fpWcAVUbgVGC/pmAHut5mZ9WJUHXWOA34NfFPSScAq4L1AS0RsSXW2Ai1peiLQWVp/UyrbUipD0lyKO3taWlqoVCp93AWz5vK1aSNRPeE+CjgFeHdErJT0JZ4eggEgIkJSNNJwRCwAFgC0tbVFe3t7I6ubDY7rl+Nr00aiesbcNwGbImJlmr+GIuwf6hpuST+3peWbgcml9SelMjMzGyQ1wz0itgKdkk5IRTOAe4BlwOxUNhtYmqaXAeenT82cCuwsDd+YmdkgqGdYBuDdwLckHQw8AFxI8YvhaklzgI3AOanudcAZwDpgT6prZmaDqK5wj4g7gLYqi2ZUqRvARf3rlpmZ9Yf/QtXMLEMOdzOzDDnczcwy5HA3M8uQw93MLEMOdzOzDDnczcwy5HA3M8uQw93MLEMOdzOzDDnczcwy5HA3M8uQw93MLEMOdzOzDDnczcwy5HA3M8uQw93MLEMOdzOzDDnczcwy5HA3M8uQw93MLEMOdzOzDNUV7pI2SLpL0h2SbktlR0m6UdLa9PPIVC5Jl0laJ2m1pFOauQNmZvaHGrlzf01ETIuItjQ/D1gREVOAFWke4HRgSnrMBb46UJ01M7P69GdYZhawKE0vAs4ulV8RhVuB8ZKO6Uc7ZmbWoFF11gvgBkkB/FtELABaImJLWr4VaEnTE4HO0rqbUtmWUhmS5lLc2dPS0kKlUunTDpg1m69NG4nqDfdXRMRmSX8E3CjpvvLCiIgU/HVLvyAWALS1tUV7e3sjq5sNjuuX42vTRqK6hmUiYnP6uQ1YArwEeKhruCX93JaqbwYml1aflMrMzGyQ1Ax3SWMlHdE1DbweuBtYBsxO1WYDS9P0MuD89KmZU4GdpeEbMzMbBPUMy7QASyR11f/PiLhe0s+BqyXNATYC56T61wFnAOuAPcCFA95rMzPrVc1wj4gHgJOqlD8MzKhSHsBFA9I7MzPrE/+FqplZhhzuZmYZcribmWXI4W5mliGHu5lZhhzuZmYZcribmWXI4W5mliGHu5lZhhzuZmYZcribmWXI4W5mliGHu5lZhhzuZmYZcribmWXI4W5mliGHu5lZhhzuZmYZcribmWXI4W5mliGHu5lZhhzuZmYZqjvcJR0k6XZJ16b54yStlLRO0lWSDk7lh6T5dWl5a5P6bmZmPWjkzv29wL2l+c8Cl0TE8cCjwJxUPgd4NJVfkuqZmdkgqivcJU0CzgS+nuYFnAZck6osAs5O07PSPGn5jFTfzMwGyag6610KfAg4Is0fDeyIiL1pfhMwMU1PBDoBImKvpJ2p/vbyBiXNBeYCtLS0UKlU+rYHZk3ma9NGoprhLumNwLaIWCWpfaAajogFwAKAtra2aG8fsE2bDZzrl+Nr00aieu7cXw7MlHQGcCjwTOBLwHhJo9Ld+yRgc6q/GZgMbJI0ChgHPDzgPTczsx7VHHOPiI9ExKSIaAXOBW6KiPOADuAtqdpsYGmaXpbmSctviogY0F6bmVmv+vM59w8D75e0jmJMfWEqXwgcncrfD8zrXxfNzKxR9b6hCkBEVIBKmn4AeEmVOo8Dbx2AvpmZWR81FO5mw8lJn7yBnb99ounttM5b3vQ2xo0ZzZ3/7/VNb8cOHA53G7F2/vYJNsw/s6ltVCqVQfm0zGD8ArEDi79bxswsQw53M7MMOdzNzDLkcDczy5DD3cwsQw53M7MMOdzNzDLkcDczy5DD3cwsQw53M7MMOdzNzDLkcDczy5DD3cwsQw53M7MMOdzNzDLkcDczy5DD3cwsQw53M7MMOdzNzDLkcDczy1DNcJd0qKSfSbpT0hpJn0zlx0laKWmdpKskHZzKD0nz69Ly1ibvg5mZdVPPnfvvgNMi4iRgGvAGSacCnwUuiYjjgUeBOan+HODRVH5JqmdmZoOoZrhHYVeaHZ0eAZwGXJPKFwFnp+lZaZ60fIYkDVSHzcystlH1VJJ0ELAKOB74MnA/sCMi9qYqm4CJaXoi0AkQEXsl7QSOBrZ32+ZcYC5AS0sLlUqlXztiB6ZmXze7du0atGvTzwEbSHWFe0TsA6ZJGg8sAV7Q34YjYgGwAKCtrS3a29v7u0k70Fy/nGZfN5VKpeltAIOyL3ZgaejTMhGxA+gAXgaMl9T1y2ESsDlNbwYmA6Tl44CHB6KzZmZWn3o+LfOsdMeOpDHA64B7KUL+LanabGBpml6W5knLb4qIGMA+m5lZDfUMyxwDLErj7s8Aro6IayXdA3xb0qeA24GFqf5C4EpJ64BHgHOb0G8zM+tFzXCPiNXAyVXKHwBeUqX8ceCtA9I7MzPrk7reUDUbjo44cR5/smhe8xtaVLtKfx1xIsCZzW/IDhgOdxuxHrt3PhvmNzcQB+vTMq3zlje9DTuw+LtlzMwy5HA3M8uQw93MLEMOdzOzDDnczcwy5HA3M8uQw93MLEMOdzOzDDnczcwy5HA3M8uQw93MLEMOdzOzDDnczcwy5HA3M8uQw93MLEMOdzOzDDnczcwy5HA3M8uQw93MLEMOdzOzDNUMd0mTJXVIukfSGknvTeVHSbpR0tr088hULkmXSVonabWkU5q9E2Zmtr967tz3Ah+IiBcCpwIXSXohMA9YERFTgBVpHuB0YEp6zAW+OuC9NjOzXtUM94jYEhG/SNOPAfcCE4FZwKJUbRFwdpqeBVwRhVuB8ZKOGeiOm5lZz0Y1UllSK3AysBJoiYgtadFWoCVNTwQ6S6ttSmVbSmVImktxZ09LSwuVSqXBrpvR9Otm165dg3Zt+jlgA6nucJd0OPBd4H0R8RtJTy2LiJAUjTQcEQuABQBtbW3R3t7eyOpmcP1ymn3dVCqVprcBDMq+2IGlrk/LSBpNEezfiojvpeKHuoZb0s9tqXwzMLm0+qRUZmZmg6SeT8sIWAjcGxFfLC1aBsxO07OBpaXy89OnZk4FdpaGb8zMbBDUMyzzcuAdwF2S7khlHwXmA1dLmgNsBM5Jy64DzgDWAXuACweyw2ZmVlvNcI+IHwPqYfGMKvUDuKif/TIzs37wX6iamWXI4W5mliGHu5lZhhzuZmYZcribmWXI4W5mlqGGvlvGbLhpnbe8+Y1c3/w2xo0Z3fQ27MDicLcRa8P8M5veRuu85YPSjtlA87CMmVmGHO5mZhlyuJuZZcjhbmaWIYe7mVmGHO5mZhlyuJuZZcjhbmaWIYe7mVmGHO5mZhlyuJuZZcjhbmaWIYe7mVmGHO5mZhmqGe6SviFpm6S7S2VHSbpR0tr088hULkmXSVonabWkU5rZeTMzq66eO/fLgTd0K5sHrIiIKcCKNA9wOjAlPeYCXx2YbpqZWSNqhntE3Aw80q14FrAoTS8Czi6VXxGFW4Hxko4ZoL6amVmd+vqfmFoiYkua3gq0pOmJQGep3qZUtoVuJM2luLunpaWFSqXSx66YNZevTRuJ+v1v9iIiJEUf1lsALABoa2uL9vb2/nbFbOBdvxxfmzYS9fXTMg91Dbekn9tS+WZgcqnepFRmZmaDqK/hvgyYnaZnA0tL5eenT82cCuwsDd+YDTlJDT02fvaNDa8jaah306yuj0IuBn4KnCBpk6Q5wHzgdZLWAq9N8wDXAQ8A64B/B/6uKb0266OIaOjR0dHR8DoRDY9Smg24mmPuEfG2HhbNqFI3gIv62ykzM+sf/4WqmVmGHO5mZhlyuJuZZcjhbmaWIYe7mVmGHO5mZhlyuJuZZUjD4Q8uJP0a2DjU/TCrYgKwfag7YdaDYyPiWdUWDItwNxuuJN0WEW1D3Q+zRnlYxswsQw53M7MMOdzNerdgqDtg1hceczczy5Dv3M3MMuRwNzPLkMPdzCxDDnfLhqSLJX1wqPsxkCR9dKj7YCOTw91GpPR/evt1/Uqq+Z/IhgGHu/WJw92GLUnvl3R3erxPUqukX0q6ArgbmCzpY5L+V9KPgRNK6z5P0vWSVkn6kaQXpPLLJX1N0krgcz20+xJJP5V0u6RbJJ2Qyi+Q9F+SbpS0QdLfpz7eLulWSUeletPS/GpJSyQdmcorktrS9ARJG0rb/V7q71pJn0vl84Exku6Q9K3mHGXLVl/++a8ffjT7AbwYuAsYCxwOrAFOBp4ETu1W5zDgmRT/mP2DadkKYEqafilwU5q+HLgWOKiXtp8JjErTrwW+m6YvSG0cATwL2Am8Ky27BHhfml4NvDpN/yNwaZquAG1pegKwobTdB4BxwKEU37M0OS3bNdTnwo+R+RgJL0vtwPQKYElE7AaQ9D3glcDGiLg11XllqrMn1VmWfh4OTAe+I6lre4eUtv2diNjXS9vjgEWSpgABjC4t64iIx4DHJO0Evp/K7wL+VNI4YHxE/DCVLwK+U8f+roiInan/9wDHAp11rGdWlcPdRprdddR5BrAjIqb1cRv/RBHib5LUSnHH3eV3peknS/NPUvv5tJenh0IP7basvN19dWzLrFcec7fh6kfA2ZIOkzQWeFMqK7s51Rkj6QjgLICI+A2wXtJb4ak3X09qoO1xwOY0fUEjnU53349KemUqegfQdRe/gWIoCeAtdW7yCUmja1cz25/D3YaliPgFxfj4z4CVwNeBR6vUuQq4E/gB8PPS4vOAOZLupBivn9VA858DPiPpdvp2Bz0b+Lyk1cA0inF3gC8Af5u2O6HObS0AVvsNVWuUv1vGzCxDvnM3M8uQ37SxA5akC4H3div+SURcNBT9MRtIHpYxM8uQh2XMzDLkcDczy5DD3cwsQw53M7MM/X8DHLEwvfQCtQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "new_df.boxplot(column=\"order_amount\")\n",
    "plt.title(\"A boxplot showing the distribution of order amount\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "toxic-madonna",
   "metadata": {},
   "source": [
    "Although there are visible outliers can be fixed but there is improvement in the appearence of the boxplot."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "equal-beaver",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>order_id</th>\n",
       "      <th>shop_id</th>\n",
       "      <th>user_id</th>\n",
       "      <th>order_amount</th>\n",
       "      <th>total_items</th>\n",
       "      <th>payment_method</th>\n",
       "      <th>created_at</th>\n",
       "      <th>max_order_per_item</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>16</td>\n",
       "      <td>42</td>\n",
       "      <td>607</td>\n",
       "      <td>704000</td>\n",
       "      <td>2000</td>\n",
       "      <td>credit_card</td>\n",
       "      <td>2017-03-07 4:00:00</td>\n",
       "      <td>352.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>41</td>\n",
       "      <td>42</td>\n",
       "      <td>793</td>\n",
       "      <td>352</td>\n",
       "      <td>1</td>\n",
       "      <td>credit_card</td>\n",
       "      <td>2017-03-24 14:15:41</td>\n",
       "      <td>352.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>60</th>\n",
       "      <td>61</td>\n",
       "      <td>42</td>\n",
       "      <td>607</td>\n",
       "      <td>704000</td>\n",
       "      <td>2000</td>\n",
       "      <td>credit_card</td>\n",
       "      <td>2017-03-04 4:00:00</td>\n",
       "      <td>352.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>99</th>\n",
       "      <td>100</td>\n",
       "      <td>18</td>\n",
       "      <td>752</td>\n",
       "      <td>780</td>\n",
       "      <td>5</td>\n",
       "      <td>cash</td>\n",
       "      <td>2017-03-06 23:41:16</td>\n",
       "      <td>156.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>136</th>\n",
       "      <td>137</td>\n",
       "      <td>15</td>\n",
       "      <td>961</td>\n",
       "      <td>765</td>\n",
       "      <td>5</td>\n",
       "      <td>credit_card</td>\n",
       "      <td>2017-03-26 5:06:46</td>\n",
       "      <td>153.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     order_id  shop_id  user_id  order_amount  total_items payment_method  \\\n",
       "15         16       42      607        704000         2000    credit_card   \n",
       "40         41       42      793           352            1    credit_card   \n",
       "60         61       42      607        704000         2000    credit_card   \n",
       "99        100       18      752           780            5           cash   \n",
       "136       137       15      961           765            5    credit_card   \n",
       "\n",
       "              created_at  max_order_per_item  \n",
       "15    2017-03-07 4:00:00               352.0  \n",
       "40   2017-03-24 14:15:41               352.0  \n",
       "60    2017-03-04 4:00:00               352.0  \n",
       "99   2017-03-06 23:41:16               156.0  \n",
       "136   2017-03-26 5:06:46               153.0  "
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_out = df[((df < (q1 - 1.5 * iqr)) |(df > (q3 + 1.5 * iqr))).any(axis=1)]\n",
    "df_out.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "arranged-republican",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The rows left after removing outliers are: 4831\n"
     ]
    }
   ],
   "source": [
    "print(\"The rows left after removing outliers are:\", new_df.shape[0])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "psychological-keeping",
   "metadata": {},
   "source": [
    "### Question one (b)\n",
    "\n",
    "What metric would you report for this dataset?"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "enabling-audience",
   "metadata": {},
   "source": [
    "Looking at the analysis done earlier it is obvious that that the mean was affected by extreme values so it will be preferrable to use a measure of dispersion that isn't affected by extreme values. Therefore, I'll be using the median value."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "breathing-flush",
   "metadata": {},
   "source": [
    "### Question one (c) \n",
    "\n",
    "What is its value?"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "corrected-judgment",
   "metadata": {},
   "source": [
    "The median value of the original data is 284."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "rural-atlantic",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count      5000.000000\n",
       "mean       3145.128000\n",
       "std       41282.539349\n",
       "min          90.000000\n",
       "25%         163.000000\n",
       "50%         284.000000\n",
       "75%         390.000000\n",
       "max      704000.000000\n",
       "Name: order_amount, dtype: float64"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[\"order_amount\"].describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "advisory-emerald",
   "metadata": {},
   "source": [
    " while the median value for the truncated data is 280."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "honest-sunrise",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    4831.000000\n",
       "mean      292.430346\n",
       "std       143.256683\n",
       "min        90.000000\n",
       "25%       162.000000\n",
       "50%       280.000000\n",
       "75%       380.000000\n",
       "max       730.000000\n",
       "Name: order_amount, dtype: float64"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "new_df[\"order_amount\"].describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "alike-genesis",
   "metadata": {},
   "source": [
    "### Question two (a) SQL\n",
    "\n",
    "How many orders were shipped by Speedy Express in total?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "unknown-services",
   "metadata": {},
   "outputs": [],
   "source": [
    "SELECT count(*) \n",
    "FROM orders, shippers\n",
    "where orders.shipperid = shippers.shipperid \n",
    "and shippers.shippername = \"Speedy Express\";"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "colored-ontario",
   "metadata": {},
   "source": [
    "Output: 54"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "reverse-burden",
   "metadata": {},
   "source": [
    "### Question two (b)\n",
    "\n",
    "What is the last name of the employee with the most orders?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "apparent-contemporary",
   "metadata": {},
   "outputs": [],
   "source": [
    "SELECT employees.lastname, count(*) as count\n",
    "from orders, employees\n",
    "where orders.employeeid = employees.employeeid\n",
    "group by orders.employeeid\n",
    "order by count DESC"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "loved-interference",
   "metadata": {},
   "source": [
    "Output: Peacock with 40 orders"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "latin-green",
   "metadata": {},
   "source": [
    "### Question two (c)\n",
    "\n",
    "What product was ordered the most by customers in Germany?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fifty-conducting",
   "metadata": {},
   "outputs": [],
   "source": [
    "SELECT p.ProductID, p.ProductName, SUM(Quantity) AS TotalQuantity \n",
    "FROM Orders AS o, OrderDetails AS od, Customers AS c, Products AS p \n",
    "WHERE c.Country = \"Germany\" AND od.OrderID = o.OrderID \n",
    "AND od.ProductID = p.ProductID AND c.CustomerID = o.CustomerID \n",
    "GROUP BY p.ProductID \n",
    "ORDER BY TotalQuantity DESC \n",
    "LIMIT 1;"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "norwegian-milwaukee",
   "metadata": {},
   "source": [
    "Output: Boston crab meat with 160 quantity"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "successful-absence",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}