{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Generating a data set" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "***" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Dog breeds" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "noobs = 100" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "array([ True, True, True, False, False, True, False, True, True,\n", " False, False, True, False, False, True, True, True, False,\n", " False, False, False, False, True, False, False, False, False,\n", " True, False, False, True, False, False, True, True, False,\n", " True, False, True, True, False, False, True, True, False,\n", " True, False, False, True, True, False, False, False, False,\n", " False, True, True, True, False, True, False, True, False,\n", " False, False, True, True, False, True, True, True, False,\n", " False, False, False, True, False, False, True, True, True,\n", " True, False, True, True, False, True, False, True, True,\n", " False, False, True, True, False, False, False, False, False,\n", " True])" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pure = np.array([True if np.random.random() < 0.5 else False for i in range(noobs)])\n", "pure" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([3, 7, 6, 3, 3, 5, 8, 3, 4, 5, 5, 6, 3, 3, 1, 4, 3, 8, 7, 2, 5, 3,\n", " 6, 5, 3, 4, 4, 8, 2, 5, 4, 2, 9, 2, 3, 2, 2, 3, 1, 4, 3, 0, 8, 8,\n", " 4, 7, 3, 3, 5, 5, 3, 2, 2, 6, 6, 6, 5, 2, 8, 4, 5, 5, 4, 5, 7, 4,\n", " 0, 5, 1, 2, 3, 5, 2, 0, 3, 0, 2, 3, 6, 4, 6, 6, 3, 4, 5, 6, 4, 2,\n", " 3, 6, 7, 6, 1, 6, 4, 4, 4, 6, 3, 7])" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "age = np.around(np.random.normal(4, 2, noobs)).astype(np.int)\n", "age[age <= 0] = 0\n", "age" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([ 7.94, 5.05, 16.15, 15.5 , 12.17, 14.11, 9.51, 15.6 , 12.36,\n", " 19.18, 15.98, 4.85, 19.52, 11.21, 14.08, 14.96, 6.84, 15.76,\n", " 17.78, 21.49, 16.26, 17.82, 7.9 , 18.26, 18.56, 11.95, 11.09,\n", " 15.57, 6.97, 19.78, 18.58, 19.68, 16.92, 3.19, 21.78, 13.49,\n", " 7.29, 11.74, 17.27, 13.89, 15.38, 12.44, 17.66, 12.55, 11.32,\n", " 15.95, 10.89, 22.06, 8.11, 12.89, 14.57, 15.37, 20.71, 16.72,\n", " 20.06, 16.1 , 16.87, 9.41, 19.5 , 16.18, 18.06, 15.5 , 14.61,\n", " 4.82, 10.92, 9.79, 21.48, 16.14, 15.26, 16.36, 15.49, 8.84,\n", " 13.85, 17.1 , 16.12, 3.67, 14.98, 8.28, 16.25, 21.81, 21.88,\n", " 14.21, 5.05, 19.55, 6.6 , 13.39, 20.9 , 8.02, 19.38, 16.52,\n", " 9.02, 13.38, 21.5 , 10.55, 11. , 20.76, 17.4 , 15.6 , 8.37,\n", " 9.09])" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "weight = np.random.normal(15, 5, noobs)\n", "weight = np.absolute(weight)\n", "weight = np.around(weight, 2)\n", "weight" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([49.6, 63.5, 68.2, 64.7, 59.7, 26.9, 49.2, 49.9, 48.1, 54.2, 57.2,\n", " 56.7, 43. , 57.2, 47.7, 59.3, 42.5, 46.8, 39.8, 59.5, 42.8, 45.1,\n", " 44.1, 50.4, 61. , 54.9, 46.1, 48. , 40.1, 49. , 48.2, 50.3, 39.9,\n", " 47.9, 41.6, 41. , 55.8, 45.2, 55.2, 47.1, 47.6, 55.9, 37.2, 58.6,\n", " 48.3, 56. , 47.6, 57. , 34.4, 36.5, 68.4, 39.1, 45.8, 48.3, 61.3,\n", " 53.4, 51.3, 48.6, 51.9, 36.9, 41.9, 66.8, 55.5, 44.9, 43.2, 52.3,\n", " 17.2, 55.8, 63.5, 55.2, 41.9, 53.4, 36. , 60.3, 28.4, 46.9, 46.7,\n", " 41.3, 44.6, 52. , 43. , 63.4, 55.7, 41. , 48.1, 65.1, 52. , 48.2,\n", " 63.5, 51.5, 58.3, 63.3, 55.7, 44.6, 43.7, 52.2, 47.5, 51.4, 54.1,\n", " 50.3])" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "height = np.random.normal(50, 10, noobs)\n", "height = np.absolute(height)\n", "height = np.around(height, 1)\n", "height" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$$ l = 5 + 1.2p + \\frac{h - 40.0}{20} - \\frac{|40.0 - w|^2}{100} $$" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([15, 17, 11, 12, 14, 12, 15, 11, 13, 10, 12, 17, 10, 14, 12, 11, 16,\n", " 12, 11, 10, 12, 11, 15, 11, 11, 14, 15, 11, 17, 10, 10, 11, 11, 19,\n", " 8, 13, 16, 14, 10, 12, 12, 14, 10, 13, 14, 11, 15, 10, 15, 12, 13,\n", " 12, 10, 11, 10, 11, 10, 14, 10, 11, 11, 11, 13, 19, 14, 14, 8, 12,\n", " 11, 11, 11, 16, 13, 12, 12, 18, 12, 16, 11, 8, 8, 12, 18, 9, 16,\n", " 13, 9, 16, 9, 11, 16, 13, 8, 14, 15, 10, 11, 12, 16, 14])" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lifespan = np.around(np.random.normal(5, 0.1, noobs) + ((~pure).astype(np.int) * 1.2) + (np.absolute(40.0 - weight)**2 / 100.0)).astype(np.int)\n", "lifespan" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
pureageweightheightlifespan
0True37.9449.615
1True75.0563.517
2True616.1568.211
3False315.5064.712
4False312.1759.714
..................
95False420.7652.210
96False417.4047.511
97False615.6051.412
98False38.3754.116
99True79.0950.314
\n", "

100 rows × 5 columns

\n", "
" ], "text/plain": [ " pure age weight height lifespan\n", "0 True 3 7.94 49.6 15\n", "1 True 7 5.05 63.5 17\n", "2 True 6 16.15 68.2 11\n", "3 False 3 15.50 64.7 12\n", "4 False 3 12.17 59.7 14\n", ".. ... ... ... ... ...\n", "95 False 4 20.76 52.2 10\n", "96 False 4 17.40 47.5 11\n", "97 False 6 15.60 51.4 12\n", "98 False 3 8.37 54.1 16\n", "99 True 7 9.09 50.3 14\n", "\n", "[100 rows x 5 columns]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.DataFrame({'pure': pure, 'age': age, 'weight': weight, 'height': height, 'lifespan': lifespan})\n", "df" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "df.to_csv(\"data/dogs.csv\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## End" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.5" } }, "nbformat": 4, "nbformat_minor": 2 }