{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"collapsed_sections": [],
"authorship_tag": "ABX9TyOfQRQnFP+0ZczRqgkkpBXb",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
" "
]
},
{
"cell_type": "code",
"execution_count": 130,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "nzIg9WBmIlGJ",
"outputId": "fc93079a-f838-4a5b-83d2-a13dd56d980e"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"array([[1, 2, 3],\n",
" [4, 5, 6]])"
]
},
"metadata": {},
"execution_count": 130
}
],
"source": [
"#CRUD data frame and management \n",
"\n",
"\n",
"\n",
"import pandas as pd\n",
"import numpy as np \n",
"\n",
"\n",
"data = np.array([[1,2,3],[4,5,6]])\n",
"data \n",
"\n"
]
},
{
"cell_type": "code",
"source": [
"r = np.array(['r1','r2'])\n",
"c = np.array(['c1','c2','c3'])\n",
"df = pd.DataFrame(data = data\n",
" ,index=r \n",
" ,columns =c)\n",
"\n",
"df"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 112
},
"id": "aOs0tEwbI7Nx",
"outputId": "19db63d9-a6e9-4f4f-8f70-8a99ad4c7423"
},
"execution_count": 131,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" c1 c2 c3\n",
"r1 1 2 3\n",
"r2 4 5 6"
],
"text/html": [
"\n",
"
\n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" c1 \n",
" c2 \n",
" c3 \n",
" \n",
" \n",
" \n",
" \n",
" r1 \n",
" 1 \n",
" 2 \n",
" 3 \n",
" \n",
" \n",
" r2 \n",
" 4 \n",
" 5 \n",
" 6 \n",
" \n",
" \n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 131
}
]
},
{
"cell_type": "code",
"source": [
"my_df = pd.DataFrame(data = [4,5,6,7],index = range(0,4),columns =['A'])\n",
"print(pd.DataFrame(my_df))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Oxw_M2THL0g-",
"outputId": "e5a8be5e-8209-4b52-c3cd-9530a1e57a94"
},
"execution_count": 132,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" A\n",
"0 4\n",
"1 5\n",
"2 6\n",
"3 7\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"#create data\n",
"\n",
"#Take a Serires as input to your DataFrame \n",
"\n",
"my_series = pd.Series({'United Kingdom':'London' , 'India':'New Delhi',\n",
" 'United States':'Washingtod', 'Belguim':'Burssels'})\n",
"\n",
"print(pd.DataFrame(my_series))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "A-eza_t5Ke74",
"outputId": "65e472d8-83bd-4b9b-9ef4-79b960466350"
},
"execution_count": 133,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" 0\n",
"United Kingdom London\n",
"India New Delhi\n",
"United States Washingtod\n",
"Belguim Burssels\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"#Data Selection with Indexing\n",
"\n",
"import pandas as pd \n",
"import numpy as np \n",
"from numpy.random import randn\n",
"\n",
"np.random.seed(101)"
],
"metadata": {
"id": "le6cNuL5MgK3"
},
"execution_count": 134,
"outputs": []
},
{
"cell_type": "code",
"source": [
"arr = randn(5,4)\n"
],
"metadata": {
"id": "L2cK4KXgM6SY"
},
"execution_count": 135,
"outputs": []
},
{
"cell_type": "code",
"source": [
"df = pd.DataFrame(arr,index='A B C D E'.split(),columns = 'W X Y Z'.split())\n",
"df"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
},
"id": "W4BD7D0wNYYW",
"outputId": "f8310152-4248-4fb7-963f-d279da7c8147"
},
"execution_count": 136,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" W X Y Z\n",
"A 2.706850 0.628133 0.907969 0.503826\n",
"B 0.651118 -0.319318 -0.848077 0.605965\n",
"C -2.018168 0.740122 0.528813 -0.589001\n",
"D 0.188695 -0.758872 -0.933237 0.955057\n",
"E 0.190794 1.978757 2.605967 0.683509"
],
"text/html": [
"\n",
" \n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" W \n",
" X \n",
" Y \n",
" Z \n",
" \n",
" \n",
" \n",
" \n",
" A \n",
" 2.706850 \n",
" 0.628133 \n",
" 0.907969 \n",
" 0.503826 \n",
" \n",
" \n",
" B \n",
" 0.651118 \n",
" -0.319318 \n",
" -0.848077 \n",
" 0.605965 \n",
" \n",
" \n",
" C \n",
" -2.018168 \n",
" 0.740122 \n",
" 0.528813 \n",
" -0.589001 \n",
" \n",
" \n",
" D \n",
" 0.188695 \n",
" -0.758872 \n",
" -0.933237 \n",
" 0.955057 \n",
" \n",
" \n",
" E \n",
" 0.190794 \n",
" 1.978757 \n",
" 2.605967 \n",
" 0.683509 \n",
" \n",
" \n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 136
}
]
},
{
"cell_type": "code",
"source": [
"#Column Selection with Indexing \n",
"df['W']"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "YzSX9c0sOSJl",
"outputId": "6013477e-dff5-4d89-971f-702897501ef8"
},
"execution_count": 137,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"A 2.706850\n",
"B 0.651118\n",
"C -2.018168\n",
"D 0.188695\n",
"E 0.190794\n",
"Name: W, dtype: float64"
]
},
"metadata": {},
"execution_count": 137
}
]
},
{
"cell_type": "code",
"source": [
"#Pass a list of column names\n",
"df[['W','Z']]"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
},
"id": "CHGaf8rbObrm",
"outputId": "873b08de-1431-48bd-ab76-35253aab5975"
},
"execution_count": 138,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" W Z\n",
"A 2.706850 0.503826\n",
"B 0.651118 0.605965\n",
"C -2.018168 -0.589001\n",
"D 0.188695 0.955057\n",
"E 0.190794 0.683509"
],
"text/html": [
"\n",
" \n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" W \n",
" Z \n",
" \n",
" \n",
" \n",
" \n",
" A \n",
" 2.706850 \n",
" 0.503826 \n",
" \n",
" \n",
" B \n",
" 0.651118 \n",
" 0.605965 \n",
" \n",
" \n",
" C \n",
" -2.018168 \n",
" -0.589001 \n",
" \n",
" \n",
" D \n",
" 0.188695 \n",
" 0.955057 \n",
" \n",
" \n",
" E \n",
" 0.190794 \n",
" 0.683509 \n",
" \n",
" \n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 138
}
]
},
{
"cell_type": "code",
"source": [
"#Create New Series add column!!!\n",
"df['new'] = df['W']+df['Y']\n",
"df"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
},
"id": "NKGOZiXFOp4c",
"outputId": "cc4738f1-0028-4874-e48a-0007e11e5b24"
},
"execution_count": 139,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" W X Y Z new\n",
"A 2.706850 0.628133 0.907969 0.503826 3.614819\n",
"B 0.651118 -0.319318 -0.848077 0.605965 -0.196959\n",
"C -2.018168 0.740122 0.528813 -0.589001 -1.489355\n",
"D 0.188695 -0.758872 -0.933237 0.955057 -0.744542\n",
"E 0.190794 1.978757 2.605967 0.683509 2.796762"
],
"text/html": [
"\n",
" \n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" W \n",
" X \n",
" Y \n",
" Z \n",
" new \n",
" \n",
" \n",
" \n",
" \n",
" A \n",
" 2.706850 \n",
" 0.628133 \n",
" 0.907969 \n",
" 0.503826 \n",
" 3.614819 \n",
" \n",
" \n",
" B \n",
" 0.651118 \n",
" -0.319318 \n",
" -0.848077 \n",
" 0.605965 \n",
" -0.196959 \n",
" \n",
" \n",
" C \n",
" -2.018168 \n",
" 0.740122 \n",
" 0.528813 \n",
" -0.589001 \n",
" -1.489355 \n",
" \n",
" \n",
" D \n",
" 0.188695 \n",
" -0.758872 \n",
" -0.933237 \n",
" 0.955057 \n",
" -0.744542 \n",
" \n",
" \n",
" E \n",
" 0.190794 \n",
" 1.978757 \n",
" 2.605967 \n",
" 0.683509 \n",
" 2.796762 \n",
" \n",
" \n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 139
}
]
},
{
"cell_type": "code",
"source": [
"#Delets Series\n",
"df2 = df.drop('new',axis = 1)\n",
"df2"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
},
"id": "y6b4_06OO609",
"outputId": "f35cdc5c-c43c-4296-fd74-7b3205b80a00"
},
"execution_count": 140,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" W X Y Z\n",
"A 2.706850 0.628133 0.907969 0.503826\n",
"B 0.651118 -0.319318 -0.848077 0.605965\n",
"C -2.018168 0.740122 0.528813 -0.589001\n",
"D 0.188695 -0.758872 -0.933237 0.955057\n",
"E 0.190794 1.978757 2.605967 0.683509"
],
"text/html": [
"\n",
" \n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" W \n",
" X \n",
" Y \n",
" Z \n",
" \n",
" \n",
" \n",
" \n",
" A \n",
" 2.706850 \n",
" 0.628133 \n",
" 0.907969 \n",
" 0.503826 \n",
" \n",
" \n",
" B \n",
" 0.651118 \n",
" -0.319318 \n",
" -0.848077 \n",
" 0.605965 \n",
" \n",
" \n",
" C \n",
" -2.018168 \n",
" 0.740122 \n",
" 0.528813 \n",
" -0.589001 \n",
" \n",
" \n",
" D \n",
" 0.188695 \n",
" -0.758872 \n",
" -0.933237 \n",
" 0.955057 \n",
" \n",
" \n",
" E \n",
" 0.190794 \n",
" 1.978757 \n",
" 2.605967 \n",
" 0.683509 \n",
" \n",
" \n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 140
}
]
},
{
"cell_type": "code",
"source": [
"# df.drop('New',axis = 1 , inplace = True)\n",
"# df"
],
"metadata": {
"id": "lY_ffqsdRA9e"
},
"execution_count": 141,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# df1=df.drop('E',axis = 0)\n",
"# df1"
],
"metadata": {
"id": "trgBdpTcSUB8"
},
"execution_count": 142,
"outputs": []
},
{
"cell_type": "code",
"source": [
"#Data Manipulation \n",
"\n",
"import pandas as pd\n",
"#Create dataframe \n",
"data = {'Company':['GooG','GooG','MSFT','MSFT','FB','FB'],\n",
" 'Person':['Sam','Charlie','Amy','Venessa','Carl','Sarah'],\n",
" 'Sales':[200,120,340,124,243,350]}\n",
"\n",
"df = pd.DataFrame(data)\n",
"df\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 237
},
"id": "diDjKy2uTOl0",
"outputId": "12f21c51-eed1-48c2-b5e9-b6a76c843cd9"
},
"execution_count": 143,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" Company Person Sales\n",
"0 GooG Sam 200\n",
"1 GooG Charlie 120\n",
"2 MSFT Amy 340\n",
"3 MSFT Venessa 124\n",
"4 FB Carl 243\n",
"5 FB Sarah 350"
],
"text/html": [
"\n",
" \n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" Company \n",
" Person \n",
" Sales \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" GooG \n",
" Sam \n",
" 200 \n",
" \n",
" \n",
" 1 \n",
" GooG \n",
" Charlie \n",
" 120 \n",
" \n",
" \n",
" 2 \n",
" MSFT \n",
" Amy \n",
" 340 \n",
" \n",
" \n",
" 3 \n",
" MSFT \n",
" Venessa \n",
" 124 \n",
" \n",
" \n",
" 4 \n",
" FB \n",
" Carl \n",
" 243 \n",
" \n",
" \n",
" 5 \n",
" FB \n",
" Sarah \n",
" 350 \n",
" \n",
" \n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 143
}
]
},
{
"cell_type": "code",
"source": [
"#Group by \n",
"by_comp = df.groupby('Company')\n",
"by_comp\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "RxCHVSzYT9z8",
"outputId": "0cbb9a0e-19f9-465e-c4c9-c1bbf0adb6ed"
},
"execution_count": 144,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
""
]
},
"metadata": {},
"execution_count": 144
}
]
},
{
"cell_type": "code",
"source": [
"by_comp.mean()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 175
},
"id": "I0dImO7mUK37",
"outputId": "bbb696ce-4f8c-483b-bd5b-baf8ad439e5a"
},
"execution_count": 145,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" Sales\n",
"Company \n",
"FB 296.5\n",
"GooG 160.0\n",
"MSFT 232.0"
],
"text/html": [
"\n",
" \n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" Sales \n",
" \n",
" \n",
" Company \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" FB \n",
" 296.5 \n",
" \n",
" \n",
" GooG \n",
" 160.0 \n",
" \n",
" \n",
" MSFT \n",
" 232.0 \n",
" \n",
" \n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 145
}
]
},
{
"cell_type": "code",
"source": [
"df.groupby('Company').mean()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 175
},
"id": "gjwnNEvKURzB",
"outputId": "ddcd413e-54cf-4027-d3e3-9204e3f5ab72"
},
"execution_count": 146,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" Sales\n",
"Company \n",
"FB 296.5\n",
"GooG 160.0\n",
"MSFT 232.0"
],
"text/html": [
"\n",
" \n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" Sales \n",
" \n",
" \n",
" Company \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" FB \n",
" 296.5 \n",
" \n",
" \n",
" GooG \n",
" 160.0 \n",
" \n",
" \n",
" MSFT \n",
" 232.0 \n",
" \n",
" \n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 146
}
]
},
{
"cell_type": "code",
"source": [
"#Head and Tail \n",
"import pandas as pd \n",
"import numpy as np \n",
"\n",
"long_series = pd.Series(np.random.randn(1000))\n",
"\n",
"long_series.head()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "SYi6zuNpUZjw",
"outputId": "9b4f5ca1-95c6-46be-afc9-7352bc264d34"
},
"execution_count": 147,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0 0.302665\n",
"1 1.693723\n",
"2 -1.706086\n",
"3 -1.159119\n",
"4 -0.134841\n",
"dtype: float64"
]
},
"metadata": {},
"execution_count": 147
}
]
},
{
"cell_type": "code",
"source": [
"long_series.tail(3)\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "eSO3g9s-UzdT",
"outputId": "cba583de-6fae-46e5-a83e-616cd112e11f"
},
"execution_count": 148,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"997 -0.824182\n",
"998 -0.553474\n",
"999 0.368730\n",
"dtype: float64"
]
},
"metadata": {},
"execution_count": 148
}
]
},
{
"cell_type": "code",
"source": [
"#Concatenate \n",
"df1 = pd.DataFrame({'A':['A0','A1','A2','A3'],\n",
" 'B':['B0','B1','B2','B3'],\n",
" 'C':['C0','C1','C2','C3'],\n",
" 'D':['D0','D1','D2','D3']},index =[0,1,2,3])\n",
"df1\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 175
},
"id": "KvP0I04hVKAc",
"outputId": "b4d1b91e-5560-42f3-be8f-d6e22d5a00b1"
},
"execution_count": 149,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" A B C D\n",
"0 A0 B0 C0 D0\n",
"1 A1 B1 C1 D1\n",
"2 A2 B2 C2 D2\n",
"3 A3 B3 C3 D3"
],
"text/html": [
"\n",
" \n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" A \n",
" B \n",
" C \n",
" D \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" A0 \n",
" B0 \n",
" C0 \n",
" D0 \n",
" \n",
" \n",
" 1 \n",
" A1 \n",
" B1 \n",
" C1 \n",
" D1 \n",
" \n",
" \n",
" 2 \n",
" A2 \n",
" B2 \n",
" C2 \n",
" D2 \n",
" \n",
" \n",
" 3 \n",
" A3 \n",
" B3 \n",
" C3 \n",
" D3 \n",
" \n",
" \n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 149
}
]
},
{
"cell_type": "code",
"source": [
"df2 = pd.DataFrame({'A':['A4','A5','A6','A7'],\n",
" 'B':['B4','B5','B6','B7'],\n",
" 'C':['C4','C5','C6','C7'],\n",
" 'D':['D4','D5','D6','D7']},index =[0,1,2,3])\n",
"df2"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 175
},
"id": "4ZdJNOQtV2Bb",
"outputId": "894e2573-6508-4877-b98f-0b83373b97aa"
},
"execution_count": 150,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" A B C D\n",
"0 A4 B4 C4 D4\n",
"1 A5 B5 C5 D5\n",
"2 A6 B6 C6 D6\n",
"3 A7 B7 C7 D7"
],
"text/html": [
"\n",
" \n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" A \n",
" B \n",
" C \n",
" D \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" A4 \n",
" B4 \n",
" C4 \n",
" D4 \n",
" \n",
" \n",
" 1 \n",
" A5 \n",
" B5 \n",
" C5 \n",
" D5 \n",
" \n",
" \n",
" 2 \n",
" A6 \n",
" B6 \n",
" C6 \n",
" D6 \n",
" \n",
" \n",
" 3 \n",
" A7 \n",
" B7 \n",
" C7 \n",
" D7 \n",
" \n",
" \n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 150
}
]
},
{
"cell_type": "code",
"source": [
"df3 = pd.DataFrame({'A':['A7','A8','A9','A10'],\n",
" 'B':['B7','B8','B9','B10'],\n",
" 'C':['C7','C8','C9','C10'],\n",
" 'D':['D7','D8','D9','D10']},index =[0,1,2,3])\n",
"df3"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 175
},
"id": "M0MLQA2sWNZz",
"outputId": "ddecc130-83c5-4252-db8e-8873369b41fa"
},
"execution_count": 151,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" A B C D\n",
"0 A7 B7 C7 D7\n",
"1 A8 B8 C8 D8\n",
"2 A9 B9 C9 D9\n",
"3 A10 B10 C10 D10"
],
"text/html": [
"\n",
" \n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" A \n",
" B \n",
" C \n",
" D \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" A7 \n",
" B7 \n",
" C7 \n",
" D7 \n",
" \n",
" \n",
" 1 \n",
" A8 \n",
" B8 \n",
" C8 \n",
" D8 \n",
" \n",
" \n",
" 2 \n",
" A9 \n",
" B9 \n",
" C9 \n",
" D9 \n",
" \n",
" \n",
" 3 \n",
" A10 \n",
" B10 \n",
" C10 \n",
" D10 \n",
" \n",
" \n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 151
}
]
},
{
"cell_type": "code",
"source": [
"pd.concat([df1,df2,df3])"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 425
},
"id": "k7MCpUKVWukz",
"outputId": "ddbcc4d5-79d5-4df3-ff67-eacb6a407458"
},
"execution_count": 152,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" A B C D\n",
"0 A0 B0 C0 D0\n",
"1 A1 B1 C1 D1\n",
"2 A2 B2 C2 D2\n",
"3 A3 B3 C3 D3\n",
"0 A4 B4 C4 D4\n",
"1 A5 B5 C5 D5\n",
"2 A6 B6 C6 D6\n",
"3 A7 B7 C7 D7\n",
"0 A7 B7 C7 D7\n",
"1 A8 B8 C8 D8\n",
"2 A9 B9 C9 D9\n",
"3 A10 B10 C10 D10"
],
"text/html": [
"\n",
" \n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" A \n",
" B \n",
" C \n",
" D \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" A0 \n",
" B0 \n",
" C0 \n",
" D0 \n",
" \n",
" \n",
" 1 \n",
" A1 \n",
" B1 \n",
" C1 \n",
" D1 \n",
" \n",
" \n",
" 2 \n",
" A2 \n",
" B2 \n",
" C2 \n",
" D2 \n",
" \n",
" \n",
" 3 \n",
" A3 \n",
" B3 \n",
" C3 \n",
" D3 \n",
" \n",
" \n",
" 0 \n",
" A4 \n",
" B4 \n",
" C4 \n",
" D4 \n",
" \n",
" \n",
" 1 \n",
" A5 \n",
" B5 \n",
" C5 \n",
" D5 \n",
" \n",
" \n",
" 2 \n",
" A6 \n",
" B6 \n",
" C6 \n",
" D6 \n",
" \n",
" \n",
" 3 \n",
" A7 \n",
" B7 \n",
" C7 \n",
" D7 \n",
" \n",
" \n",
" 0 \n",
" A7 \n",
" B7 \n",
" C7 \n",
" D7 \n",
" \n",
" \n",
" 1 \n",
" A8 \n",
" B8 \n",
" C8 \n",
" D8 \n",
" \n",
" \n",
" 2 \n",
" A9 \n",
" B9 \n",
" C9 \n",
" D9 \n",
" \n",
" \n",
" 3 \n",
" A10 \n",
" B10 \n",
" C10 \n",
" D10 \n",
" \n",
" \n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 152
}
]
},
{
"cell_type": "code",
"source": [
"pd.concat([df1,df2,df3],axis =1)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 175
},
"id": "L6AQyEz8XK0B",
"outputId": "b8638641-9bf1-4f1b-aeab-bf1291cdb0df"
},
"execution_count": 153,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" A B C D A B C D A B C D\n",
"0 A0 B0 C0 D0 A4 B4 C4 D4 A7 B7 C7 D7\n",
"1 A1 B1 C1 D1 A5 B5 C5 D5 A8 B8 C8 D8\n",
"2 A2 B2 C2 D2 A6 B6 C6 D6 A9 B9 C9 D9\n",
"3 A3 B3 C3 D3 A7 B7 C7 D7 A10 B10 C10 D10"
],
"text/html": [
"\n",
" \n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" A \n",
" B \n",
" C \n",
" D \n",
" A \n",
" B \n",
" C \n",
" D \n",
" A \n",
" B \n",
" C \n",
" D \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" A0 \n",
" B0 \n",
" C0 \n",
" D0 \n",
" A4 \n",
" B4 \n",
" C4 \n",
" D4 \n",
" A7 \n",
" B7 \n",
" C7 \n",
" D7 \n",
" \n",
" \n",
" 1 \n",
" A1 \n",
" B1 \n",
" C1 \n",
" D1 \n",
" A5 \n",
" B5 \n",
" C5 \n",
" D5 \n",
" A8 \n",
" B8 \n",
" C8 \n",
" D8 \n",
" \n",
" \n",
" 2 \n",
" A2 \n",
" B2 \n",
" C2 \n",
" D2 \n",
" A6 \n",
" B6 \n",
" C6 \n",
" D6 \n",
" A9 \n",
" B9 \n",
" C9 \n",
" D9 \n",
" \n",
" \n",
" 3 \n",
" A3 \n",
" B3 \n",
" C3 \n",
" D3 \n",
" A7 \n",
" B7 \n",
" C7 \n",
" D7 \n",
" A10 \n",
" B10 \n",
" C10 \n",
" D10 \n",
" \n",
" \n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 153
}
]
},
{
"cell_type": "code",
"source": [
"left = pd.DataFrame(\n",
" {\n",
" \"key1\": [\"K0\", \"K0\", \"K1\", \"K2\"],\n",
" \"key2\": [\"K0\", \"K1\", \"K0\", \"K1\"],\n",
" \"A\": [\"A0\", \"A1\", \"A2\", \"A3\"],\n",
" \"B\": [\"B0\", \"B1\", \"B2\", \"B3\"],\n",
" }\n",
")"
],
"metadata": {
"id": "D-V1T0Hkag-j"
},
"execution_count": 160,
"outputs": []
},
{
"cell_type": "code",
"source": [
"right = pd.DataFrame(\n",
" {\n",
" \"key1\": [\"K0\", \"K1\", \"K1\", \"K2\"],\n",
" \"key2\": [\"K0\", \"K0\", \"K0\", \"K0\"],\n",
" \"C\": [\"C0\", \"C1\", \"C2\", \"C3\"],\n",
" \"D\": [\"D0\", \"D1\", \"D2\", \"D3\"],\n",
" }\n",
")"
],
"metadata": {
"id": "J1PJ9DhpahDw"
},
"execution_count": 161,
"outputs": []
},
{
"cell_type": "code",
"source": [
"result = pd.merge(left, right, on=[\"key1\", \"key2\"])\n",
"result"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 143
},
"id": "WXD1FBy9anCA",
"outputId": "f357fe1e-3210-469f-c990-8ea6ef176cd9"
},
"execution_count": 162,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" key1 key2 A B C D\n",
"0 K0 K0 A0 B0 C0 D0\n",
"1 K1 K0 A2 B2 C1 D1\n",
"2 K1 K0 A2 B2 C2 D2"
],
"text/html": [
"\n",
" \n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" key1 \n",
" key2 \n",
" A \n",
" B \n",
" C \n",
" D \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" K0 \n",
" K0 \n",
" A0 \n",
" B0 \n",
" C0 \n",
" D0 \n",
" \n",
" \n",
" 1 \n",
" K1 \n",
" K0 \n",
" A2 \n",
" B2 \n",
" C1 \n",
" D1 \n",
" \n",
" \n",
" 2 \n",
" K1 \n",
" K0 \n",
" A2 \n",
" B2 \n",
" C2 \n",
" D2 \n",
" \n",
" \n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 162
}
]
},
{
"cell_type": "code",
"source": [
"result = pd.merge(left, right, how=\"left\", on=[\"key1\", \"key2\"])\n",
"result"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
},
"id": "ae3yUG4JbPNR",
"outputId": "9d338233-6f68-46ce-9d4d-ccfe18f5f289"
},
"execution_count": 163,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" key1 key2 A B C D\n",
"0 K0 K0 A0 B0 C0 D0\n",
"1 K0 K1 A1 B1 NaN NaN\n",
"2 K1 K0 A2 B2 C1 D1\n",
"3 K1 K0 A2 B2 C2 D2\n",
"4 K2 K1 A3 B3 NaN NaN"
],
"text/html": [
"\n",
" \n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" key1 \n",
" key2 \n",
" A \n",
" B \n",
" C \n",
" D \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" K0 \n",
" K0 \n",
" A0 \n",
" B0 \n",
" C0 \n",
" D0 \n",
" \n",
" \n",
" 1 \n",
" K0 \n",
" K1 \n",
" A1 \n",
" B1 \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 2 \n",
" K1 \n",
" K0 \n",
" A2 \n",
" B2 \n",
" C1 \n",
" D1 \n",
" \n",
" \n",
" 3 \n",
" K1 \n",
" K0 \n",
" A2 \n",
" B2 \n",
" C2 \n",
" D2 \n",
" \n",
" \n",
" 4 \n",
" K2 \n",
" K1 \n",
" A3 \n",
" B3 \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 163
}
]
},
{
"cell_type": "code",
"source": [
"result = pd.merge(left, right, how=\"outer\", on=[\"key1\", \"key2\"])\n",
"result"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 237
},
"id": "uwddfBAqbbvR",
"outputId": "d59b2438-439c-4a21-ddbb-050d5fd014fb"
},
"execution_count": 164,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" key1 key2 A B C D\n",
"0 K0 K0 A0 B0 C0 D0\n",
"1 K0 K1 A1 B1 NaN NaN\n",
"2 K1 K0 A2 B2 C1 D1\n",
"3 K1 K0 A2 B2 C2 D2\n",
"4 K2 K1 A3 B3 NaN NaN\n",
"5 K2 K0 NaN NaN C3 D3"
],
"text/html": [
"\n",
" \n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" key1 \n",
" key2 \n",
" A \n",
" B \n",
" C \n",
" D \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" K0 \n",
" K0 \n",
" A0 \n",
" B0 \n",
" C0 \n",
" D0 \n",
" \n",
" \n",
" 1 \n",
" K0 \n",
" K1 \n",
" A1 \n",
" B1 \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 2 \n",
" K1 \n",
" K0 \n",
" A2 \n",
" B2 \n",
" C1 \n",
" D1 \n",
" \n",
" \n",
" 3 \n",
" K1 \n",
" K0 \n",
" A2 \n",
" B2 \n",
" C2 \n",
" D2 \n",
" \n",
" \n",
" 4 \n",
" K2 \n",
" K1 \n",
" A3 \n",
" B3 \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 5 \n",
" K2 \n",
" K0 \n",
" NaN \n",
" NaN \n",
" C3 \n",
" D3 \n",
" \n",
" \n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 164
}
]
},
{
"cell_type": "code",
"source": [
"raw_data ={'first_name':['Jason',np.nan,'Tina','Jake','Amy'],\n",
" 'last_name': ['Miller',np.nan,'Ali','Miler','Cooze'],\n",
" 'age':[42,np.nan,36,24,73],\n",
" 'sex':['m',np.nan,'f','m','f'],\n",
" 'preTestScore':[4,np.nan ,np.nan,2,3],\n",
" 'postTestScore':[25,np.nan,np.nan,62,70]}\n",
"df = pd.DataFrame(raw_data,columns = ['first_name','last_name','age',\n",
" 'sex','preTestscore','postTestscore'])\n",
"df"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
},
"id": "MUlTFEt-b6Qh",
"outputId": "44fd866d-25bc-4962-d7cc-e6214566ba5f"
},
"execution_count": 170,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" first_name last_name age sex preTestscore postTestscore\n",
"0 Jason Miller 42.0 m NaN NaN\n",
"1 NaN NaN NaN NaN NaN NaN\n",
"2 Tina Ali 36.0 f NaN NaN\n",
"3 Jake Miler 24.0 m NaN NaN\n",
"4 Amy Cooze 73.0 f NaN NaN"
],
"text/html": [
"\n",
" \n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" first_name \n",
" last_name \n",
" age \n",
" sex \n",
" preTestscore \n",
" postTestscore \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" Jason \n",
" Miller \n",
" 42.0 \n",
" m \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 1 \n",
" NaN \n",
" NaN \n",
" NaN \n",
" NaN \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 2 \n",
" Tina \n",
" Ali \n",
" 36.0 \n",
" f \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 3 \n",
" Jake \n",
" Miler \n",
" 24.0 \n",
" m \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 4 \n",
" Amy \n",
" Cooze \n",
" 73.0 \n",
" f \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 170
}
]
},
{
"cell_type": "code",
"source": [
"df_no_missing = df.dropna()\n",
"df_no_missing"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 49
},
"id": "E_9X3OM6dqDo",
"outputId": "0d9b1cb4-ca08-4c04-d3c4-235cdcdc452e"
},
"execution_count": 171,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Empty DataFrame\n",
"Columns: [first_name, last_name, age, sex, preTestscore, postTestscore]\n",
"Index: []"
],
"text/html": [
"\n",
" \n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" first_name \n",
" last_name \n",
" age \n",
" sex \n",
" preTestscore \n",
" postTestscore \n",
" \n",
" \n",
" \n",
" \n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 171
}
]
},
{
"cell_type": "code",
"source": [
"df_cleaned = df.dropna(how = 'all')\n",
"df_cleaned"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 175
},
"id": "r5TKxrrXezbq",
"outputId": "cfec62df-9188-4d0b-a78c-bb7512790670"
},
"execution_count": 172,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" first_name last_name age sex preTestscore postTestscore\n",
"0 Jason Miller 42.0 m NaN NaN\n",
"2 Tina Ali 36.0 f NaN NaN\n",
"3 Jake Miler 24.0 m NaN NaN\n",
"4 Amy Cooze 73.0 f NaN NaN"
],
"text/html": [
"\n",
" \n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" first_name \n",
" last_name \n",
" age \n",
" sex \n",
" preTestscore \n",
" postTestscore \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" Jason \n",
" Miller \n",
" 42.0 \n",
" m \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 2 \n",
" Tina \n",
" Ali \n",
" 36.0 \n",
" f \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 3 \n",
" Jake \n",
" Miler \n",
" 24.0 \n",
" m \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 4 \n",
" Amy \n",
" Cooze \n",
" 73.0 \n",
" f \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 172
}
]
},
{
"cell_type": "code",
"source": [
"#fillna()\n",
"df.fillna(0)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
},
"id": "Wb5UB-3HfDnu",
"outputId": "d71a1949-1eb5-420c-b388-71d8a5459cb9"
},
"execution_count": 177,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" first_name last_name age sex preTestscore postTestscore\n",
"0 Jason Miller 42.0 m 0 0\n",
"1 0 0 0.0 0 0 0\n",
"2 Tina Ali 36.0 f 0 0\n",
"3 Jake Miler 24.0 m 0 0\n",
"4 Amy Cooze 73.0 f 0 0"
],
"text/html": [
"\n",
" \n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" first_name \n",
" last_name \n",
" age \n",
" sex \n",
" preTestscore \n",
" postTestscore \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" Jason \n",
" Miller \n",
" 42.0 \n",
" m \n",
" 0 \n",
" 0 \n",
" \n",
" \n",
" 1 \n",
" 0 \n",
" 0 \n",
" 0.0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" \n",
" \n",
" 2 \n",
" Tina \n",
" Ali \n",
" 36.0 \n",
" f \n",
" 0 \n",
" 0 \n",
" \n",
" \n",
" 3 \n",
" Jake \n",
" Miler \n",
" 24.0 \n",
" m \n",
" 0 \n",
" 0 \n",
" \n",
" \n",
" 4 \n",
" Amy \n",
" Cooze \n",
" 73.0 \n",
" f \n",
" 0 \n",
" 0 \n",
" \n",
" \n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 177
}
]
},
{
"cell_type": "code",
"source": [
"df['preTestscore'].fillna(df['preTestscore'].mean(),inplace =True)\n",
"df"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
},
"id": "n2TXOM9qftOe",
"outputId": "38d04e2b-d927-444f-ad25-7f109bac4f19"
},
"execution_count": 179,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" first_name last_name age sex preTestscore postTestscore\n",
"0 Jason Miller 42.0 m NaN NaN\n",
"1 NaN NaN NaN NaN NaN NaN\n",
"2 Tina Ali 36.0 f NaN NaN\n",
"3 Jake Miler 24.0 m NaN NaN\n",
"4 Amy Cooze 73.0 f NaN NaN"
],
"text/html": [
"\n",
" \n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" first_name \n",
" last_name \n",
" age \n",
" sex \n",
" preTestscore \n",
" postTestscore \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" Jason \n",
" Miller \n",
" 42.0 \n",
" m \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 1 \n",
" NaN \n",
" NaN \n",
" NaN \n",
" NaN \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 2 \n",
" Tina \n",
" Ali \n",
" 36.0 \n",
" f \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 3 \n",
" Jake \n",
" Miler \n",
" 24.0 \n",
" m \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 4 \n",
" Amy \n",
" Cooze \n",
" 73.0 \n",
" f \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 179
}
]
}
]
}