{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Generating a data set" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "***" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Dog breeds" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "noobs = 100" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "array([ True, True, True, False, False, True, False, True, True,\n", " False, False, True, False, False, True, True, True, False,\n", " False, False, False, False, True, False, False, False, False,\n", " True, False, False, True, False, False, True, True, False,\n", " True, False, True, True, False, False, True, True, False,\n", " True, False, False, True, True, False, False, False, False,\n", " False, True, True, True, False, True, False, True, False,\n", " False, False, True, True, False, True, True, True, False,\n", " False, False, False, True, False, False, True, True, True,\n", " True, False, True, True, False, True, False, True, True,\n", " False, False, True, True, False, False, False, False, False,\n", " True])" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pure = np.array([True if np.random.random() < 0.5 else False for i in range(noobs)])\n", "pure" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([3, 7, 6, 3, 3, 5, 8, 3, 4, 5, 5, 6, 3, 3, 1, 4, 3, 8, 7, 2, 5, 3,\n", " 6, 5, 3, 4, 4, 8, 2, 5, 4, 2, 9, 2, 3, 2, 2, 3, 1, 4, 3, 0, 8, 8,\n", " 4, 7, 3, 3, 5, 5, 3, 2, 2, 6, 6, 6, 5, 2, 8, 4, 5, 5, 4, 5, 7, 4,\n", " 0, 5, 1, 2, 3, 5, 2, 0, 3, 0, 2, 3, 6, 4, 6, 6, 3, 4, 5, 6, 4, 2,\n", " 3, 6, 7, 6, 1, 6, 4, 4, 4, 6, 3, 7])" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "age = np.around(np.random.normal(4, 2, noobs)).astype(np.int)\n", "age[age <= 0] = 0\n", "age" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([ 7.94, 5.05, 16.15, 15.5 , 12.17, 14.11, 9.51, 15.6 , 12.36,\n", " 19.18, 15.98, 4.85, 19.52, 11.21, 14.08, 14.96, 6.84, 15.76,\n", " 17.78, 21.49, 16.26, 17.82, 7.9 , 18.26, 18.56, 11.95, 11.09,\n", " 15.57, 6.97, 19.78, 18.58, 19.68, 16.92, 3.19, 21.78, 13.49,\n", " 7.29, 11.74, 17.27, 13.89, 15.38, 12.44, 17.66, 12.55, 11.32,\n", " 15.95, 10.89, 22.06, 8.11, 12.89, 14.57, 15.37, 20.71, 16.72,\n", " 20.06, 16.1 , 16.87, 9.41, 19.5 , 16.18, 18.06, 15.5 , 14.61,\n", " 4.82, 10.92, 9.79, 21.48, 16.14, 15.26, 16.36, 15.49, 8.84,\n", " 13.85, 17.1 , 16.12, 3.67, 14.98, 8.28, 16.25, 21.81, 21.88,\n", " 14.21, 5.05, 19.55, 6.6 , 13.39, 20.9 , 8.02, 19.38, 16.52,\n", " 9.02, 13.38, 21.5 , 10.55, 11. , 20.76, 17.4 , 15.6 , 8.37,\n", " 9.09])" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "weight = np.random.normal(15, 5, noobs)\n", "weight = np.absolute(weight)\n", "weight = np.around(weight, 2)\n", "weight" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([49.6, 63.5, 68.2, 64.7, 59.7, 26.9, 49.2, 49.9, 48.1, 54.2, 57.2,\n", " 56.7, 43. , 57.2, 47.7, 59.3, 42.5, 46.8, 39.8, 59.5, 42.8, 45.1,\n", " 44.1, 50.4, 61. , 54.9, 46.1, 48. , 40.1, 49. , 48.2, 50.3, 39.9,\n", " 47.9, 41.6, 41. , 55.8, 45.2, 55.2, 47.1, 47.6, 55.9, 37.2, 58.6,\n", " 48.3, 56. , 47.6, 57. , 34.4, 36.5, 68.4, 39.1, 45.8, 48.3, 61.3,\n", " 53.4, 51.3, 48.6, 51.9, 36.9, 41.9, 66.8, 55.5, 44.9, 43.2, 52.3,\n", " 17.2, 55.8, 63.5, 55.2, 41.9, 53.4, 36. , 60.3, 28.4, 46.9, 46.7,\n", " 41.3, 44.6, 52. , 43. , 63.4, 55.7, 41. , 48.1, 65.1, 52. , 48.2,\n", " 63.5, 51.5, 58.3, 63.3, 55.7, 44.6, 43.7, 52.2, 47.5, 51.4, 54.1,\n", " 50.3])" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "height = np.random.normal(50, 10, noobs)\n", "height = np.absolute(height)\n", "height = np.around(height, 1)\n", "height" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$$ l = 5 + 1.2p + \\frac{h - 40.0}{20} - \\frac{|40.0 - w|^2}{100} $$" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([15, 17, 11, 12, 14, 12, 15, 11, 13, 10, 12, 17, 10, 14, 12, 11, 16,\n", " 12, 11, 10, 12, 11, 15, 11, 11, 14, 15, 11, 17, 10, 10, 11, 11, 19,\n", " 8, 13, 16, 14, 10, 12, 12, 14, 10, 13, 14, 11, 15, 10, 15, 12, 13,\n", " 12, 10, 11, 10, 11, 10, 14, 10, 11, 11, 11, 13, 19, 14, 14, 8, 12,\n", " 11, 11, 11, 16, 13, 12, 12, 18, 12, 16, 11, 8, 8, 12, 18, 9, 16,\n", " 13, 9, 16, 9, 11, 16, 13, 8, 14, 15, 10, 11, 12, 16, 14])" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lifespan = np.around(np.random.normal(5, 0.1, noobs) + ((~pure).astype(np.int) * 1.2) + (np.absolute(40.0 - weight)**2 / 100.0)).astype(np.int)\n", "lifespan" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", " | pure | \n", "age | \n", "weight | \n", "height | \n", "lifespan | \n", "
---|---|---|---|---|---|
0 | \n", "True | \n", "3 | \n", "7.94 | \n", "49.6 | \n", "15 | \n", "
1 | \n", "True | \n", "7 | \n", "5.05 | \n", "63.5 | \n", "17 | \n", "
2 | \n", "True | \n", "6 | \n", "16.15 | \n", "68.2 | \n", "11 | \n", "
3 | \n", "False | \n", "3 | \n", "15.50 | \n", "64.7 | \n", "12 | \n", "
4 | \n", "False | \n", "3 | \n", "12.17 | \n", "59.7 | \n", "14 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
95 | \n", "False | \n", "4 | \n", "20.76 | \n", "52.2 | \n", "10 | \n", "
96 | \n", "False | \n", "4 | \n", "17.40 | \n", "47.5 | \n", "11 | \n", "
97 | \n", "False | \n", "6 | \n", "15.60 | \n", "51.4 | \n", "12 | \n", "
98 | \n", "False | \n", "3 | \n", "8.37 | \n", "54.1 | \n", "16 | \n", "
99 | \n", "True | \n", "7 | \n", "9.09 | \n", "50.3 | \n", "14 | \n", "
100 rows × 5 columns
\n", "