{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## DataFrame Creation\n",
    "\n",
    "In this notebook, we will learn to create new ```DataFrame``` object from other data structures( e.g.,numpy array and dictionary) and convert data frame to numpy array and dictionary. The defult setting for pandas ```DataFrame``` is \n",
    "\n",
    "```pandas.DataFrame(data=None, index=None, columns=None, dtype=None, copy=False)```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "sns.set()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 1. To create new ```DataFrame``` from Numpy array. \n",
    "\n",
    "Let's create a random array of size(100,20) and random column names. We will use these array and column names to create the ```DataFrame``` in next step."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "import random as random\n",
    "A = np.random.rand(100,10)\n",
    "letter = ['A','B','C','D','E','F','G','H','X']\n",
    "\n",
    "def namer(n):\n",
    "    col_names = [ random.choice(letter)\\\n",
    "             +random.choice(letter)\\\n",
    "             +random.choice(letter)\\\n",
    "             +random.choice(letter) for i in range(n)]\n",
    "    return col_names"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['HHEE', 'FEGD', 'BFHC', 'HXFC', 'CBDF', 'DEDH', 'CBCX', 'XGXB', 'GCBC', 'FDEE']\n"
     ]
    }
   ],
   "source": [
    "print(namer(A.shape[1]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>AAGF</th>\n",
       "      <th>XBGA</th>\n",
       "      <th>DXAC</th>\n",
       "      <th>XEDB</th>\n",
       "      <th>EDCG</th>\n",
       "      <th>ABDH</th>\n",
       "      <th>GFHX</th>\n",
       "      <th>FAAB</th>\n",
       "      <th>BBEC</th>\n",
       "      <th>DGDC</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.285058</td>\n",
       "      <td>0.456733</td>\n",
       "      <td>0.841292</td>\n",
       "      <td>0.397957</td>\n",
       "      <td>0.888982</td>\n",
       "      <td>0.970782</td>\n",
       "      <td>0.379687</td>\n",
       "      <td>0.565177</td>\n",
       "      <td>0.657939</td>\n",
       "      <td>0.461385</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.725005</td>\n",
       "      <td>0.432756</td>\n",
       "      <td>0.037801</td>\n",
       "      <td>0.020203</td>\n",
       "      <td>0.590901</td>\n",
       "      <td>0.590571</td>\n",
       "      <td>0.623529</td>\n",
       "      <td>0.166581</td>\n",
       "      <td>0.179392</td>\n",
       "      <td>0.454290</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.668743</td>\n",
       "      <td>0.352332</td>\n",
       "      <td>0.642905</td>\n",
       "      <td>0.427461</td>\n",
       "      <td>0.025124</td>\n",
       "      <td>0.365414</td>\n",
       "      <td>0.609983</td>\n",
       "      <td>0.568686</td>\n",
       "      <td>0.522738</td>\n",
       "      <td>0.525048</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.807165</td>\n",
       "      <td>0.827478</td>\n",
       "      <td>0.542088</td>\n",
       "      <td>0.628743</td>\n",
       "      <td>0.616745</td>\n",
       "      <td>0.386370</td>\n",
       "      <td>0.632225</td>\n",
       "      <td>0.387794</td>\n",
       "      <td>0.618686</td>\n",
       "      <td>0.786503</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.645945</td>\n",
       "      <td>0.136078</td>\n",
       "      <td>0.769546</td>\n",
       "      <td>0.721885</td>\n",
       "      <td>0.107891</td>\n",
       "      <td>0.128859</td>\n",
       "      <td>0.938451</td>\n",
       "      <td>0.875492</td>\n",
       "      <td>0.647702</td>\n",
       "      <td>0.148635</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       AAGF      XBGA      DXAC      XEDB      EDCG      ABDH      GFHX  \\\n",
       "0  0.285058  0.456733  0.841292  0.397957  0.888982  0.970782  0.379687   \n",
       "1  0.725005  0.432756  0.037801  0.020203  0.590901  0.590571  0.623529   \n",
       "2  0.668743  0.352332  0.642905  0.427461  0.025124  0.365414  0.609983   \n",
       "3  0.807165  0.827478  0.542088  0.628743  0.616745  0.386370  0.632225   \n",
       "4  0.645945  0.136078  0.769546  0.721885  0.107891  0.128859  0.938451   \n",
       "\n",
       "       FAAB      BBEC      DGDC  \n",
       "0  0.565177  0.657939  0.461385  \n",
       "1  0.166581  0.179392  0.454290  \n",
       "2  0.568686  0.522738  0.525048  \n",
       "3  0.387794  0.618686  0.786503  \n",
       "4  0.875492  0.647702  0.148635  "
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame(A, columns = col_names )\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- To save data from ```new DataFrame``` to a file:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.to_csv('data/test.csv')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 2. To create new  ```DataFrame``` from  list of dictionaries.\n",
    "\n",
    "Here we will create a list with collection of dictionaries. Each of the dictionary will have keys and values. Using this list of dictionaries, we will create another ```DataFrame```. The keys of the dictionary will serve as the column names."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "LD = []\n",
    "for i in range(100):\n",
    "    LD.append({'Player' : namer(1)[0],\\\n",
    "               'game1' : random.uniform(0,1),\\\n",
    "               'game2' : random.uniform(0,1),\\\n",
    "               'game3' : random.uniform(0,1),\n",
    "               'game4' : random.uniform(0,1),\n",
    "               'game5' : random.uniform(0,1)})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'Player': 'BGXB',\n",
       " 'game1': 0.2965944756471328,\n",
       " 'game2': 0.11334763879800447,\n",
       " 'game3': 0.028543866127768824,\n",
       " 'game4': 0.225405432495144,\n",
       " 'game5': 0.05423542200055986}"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "LD[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "DF = pd.DataFrame(LD)\n",
    "DF=DF.set_index(\"Player\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>game1</th>\n",
       "      <th>game2</th>\n",
       "      <th>game3</th>\n",
       "      <th>game4</th>\n",
       "      <th>game5</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Player</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>BGXB</th>\n",
       "      <td>0.296594</td>\n",
       "      <td>0.113348</td>\n",
       "      <td>0.028544</td>\n",
       "      <td>0.225405</td>\n",
       "      <td>0.054235</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DBDB</th>\n",
       "      <td>0.047226</td>\n",
       "      <td>0.107065</td>\n",
       "      <td>0.801571</td>\n",
       "      <td>0.816877</td>\n",
       "      <td>0.556934</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AXXH</th>\n",
       "      <td>0.862611</td>\n",
       "      <td>0.439051</td>\n",
       "      <td>0.083341</td>\n",
       "      <td>0.389785</td>\n",
       "      <td>0.258748</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>BXED</th>\n",
       "      <td>0.643533</td>\n",
       "      <td>0.082176</td>\n",
       "      <td>0.167241</td>\n",
       "      <td>0.405304</td>\n",
       "      <td>0.088063</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>FXGE</th>\n",
       "      <td>0.279076</td>\n",
       "      <td>0.000998</td>\n",
       "      <td>0.949414</td>\n",
       "      <td>0.303408</td>\n",
       "      <td>0.009342</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AHDC</th>\n",
       "      <td>0.617194</td>\n",
       "      <td>0.272401</td>\n",
       "      <td>0.252663</td>\n",
       "      <td>0.788798</td>\n",
       "      <td>0.130996</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>CXGA</th>\n",
       "      <td>0.104552</td>\n",
       "      <td>0.895106</td>\n",
       "      <td>0.414877</td>\n",
       "      <td>0.167643</td>\n",
       "      <td>0.454175</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>BDBA</th>\n",
       "      <td>0.045650</td>\n",
       "      <td>0.926742</td>\n",
       "      <td>0.454097</td>\n",
       "      <td>0.055006</td>\n",
       "      <td>0.939082</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>HBDC</th>\n",
       "      <td>0.069192</td>\n",
       "      <td>0.797224</td>\n",
       "      <td>0.943648</td>\n",
       "      <td>0.567334</td>\n",
       "      <td>0.044285</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>EBBX</th>\n",
       "      <td>0.383365</td>\n",
       "      <td>0.852788</td>\n",
       "      <td>0.679330</td>\n",
       "      <td>0.418570</td>\n",
       "      <td>0.817291</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           game1     game2     game3     game4     game5\n",
       "Player                                                  \n",
       "BGXB    0.296594  0.113348  0.028544  0.225405  0.054235\n",
       "DBDB    0.047226  0.107065  0.801571  0.816877  0.556934\n",
       "AXXH    0.862611  0.439051  0.083341  0.389785  0.258748\n",
       "BXED    0.643533  0.082176  0.167241  0.405304  0.088063\n",
       "FXGE    0.279076  0.000998  0.949414  0.303408  0.009342\n",
       "AHDC    0.617194  0.272401  0.252663  0.788798  0.130996\n",
       "CXGA    0.104552  0.895106  0.414877  0.167643  0.454175\n",
       "BDBA    0.045650  0.926742  0.454097  0.055006  0.939082\n",
       "HBDC    0.069192  0.797224  0.943648  0.567334  0.044285\n",
       "EBBX    0.383365  0.852788  0.679330  0.418570  0.817291"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "DF.head(10)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 3. To create ```DataFrame``` from a List :"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.816389</td>\n",
       "      <td>0.212530</td>\n",
       "      <td>0.705185</td>\n",
       "      <td>0.225743</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.646496</td>\n",
       "      <td>0.876869</td>\n",
       "      <td>0.648350</td>\n",
       "      <td>0.788687</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.869173</td>\n",
       "      <td>0.832004</td>\n",
       "      <td>0.516009</td>\n",
       "      <td>0.917783</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.668866</td>\n",
       "      <td>0.778850</td>\n",
       "      <td>0.834528</td>\n",
       "      <td>0.243842</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.742311</td>\n",
       "      <td>0.984313</td>\n",
       "      <td>0.872512</td>\n",
       "      <td>0.451476</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          A         B         C         D\n",
       "0  0.816389  0.212530  0.705185  0.225743\n",
       "1  0.646496  0.876869  0.648350  0.788687\n",
       "2  0.869173  0.832004  0.516009  0.917783\n",
       "3  0.668866  0.778850  0.834528  0.243842\n",
       "4  0.742311  0.984313  0.872512  0.451476"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "A = [random.uniform(0,1)for i in range(10)]\n",
    "B = [random.uniform(0,1)for i in range(10)]\n",
    "C = [random.uniform(0,1)for i in range(10)]\n",
    "D = [random.uniform(0,1)for i in range(10)]\n",
    "\n",
    "df = pd.DataFrame()\n",
    "df['A'],df['B'],df['C'],df['D'] = A,B,C,D\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### References:\n",
    "1. [Pydata document for Styling DataFrame visualization](https://pandas.pydata.org/docs/user_guide/style.html)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}