{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "\"Open" ] }, { "cell_type": "markdown", "metadata": { "id": "HpQOV466TyqP" }, "source": [ "# Pandas \n", "PanDas = Panel Data Sets\n", "Para trabajar con datos multidimensionales habituales en estadística, econometría, ... \n", "Permite manipular tablas de datos (**DataFrame**) \n", "Las tablas pueden tener diferentes tipos de datos a diferencia de las matrices de NumPy" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "id": "30wdSAAHTyqT" }, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "markdown", "metadata": { "id": "qZlx20VdTyqY" }, "source": [ "## Crear un DataFrame" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "id": "VssZwjhaTyqa" }, "outputs": [], "source": [ "fbk = ['Facebook', 2740, True, 2006]\n", "twt = ['Twitter', 339, False, 2006]\n", "ig = ['Instagram', 1221, True, 2010]\n", "yt = ['YouTube', 2291, False, 2005]\n", "lkn = ['LinkedIn', 727, False, 2003]\n", "wsp = ['WhatsApp', 2000, True, 2009]\n", "tik = ['TikTok', 689, False, 2016]\n", "tel = ['Telegram', 500, False, 2013]" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "id": "oesp0wg6Tyqf", "outputId": "5f7ed9cc-fd09-4f94-db3a-2a795ea1fb0b", "colab": { "base_uri": "https://localhost:8080/" } }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[['Facebook', 2740, True, 2006],\n", " ['Twitter', 339, False, 2006],\n", " ['Instagram', 1221, True, 2010],\n", " ['YouTube', 2291, False, 2005],\n", " ['LinkedIn', 727, False, 2003],\n", " ['WhatsApp', 2000, True, 2009],\n", " ['TikTok', 689, False, 2016],\n", " ['Telegram', 500, False, 2013]]" ] }, "metadata": {}, "execution_count": 3 } ], "source": [ "rrss = [fbk,twt,ig,yt,lkn,wsp,tik,tel] # redes sociales 2021\n", "rrss" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "id": "9OSCemw1Tyqi", "outputId": "5dfdec17-ff7b-4b95-b419-4e0f8b33de59", "colab": { "base_uri": "https://localhost:8080/", "height": 300 } }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " Nombre Usuarios es_FBK Año\n", "0 Facebook 2740 True 2006\n", "1 Twitter 339 False 2006\n", "2 Instagram 1221 True 2010\n", "3 YouTube 2291 False 2005\n", "4 LinkedIn 727 False 2003\n", "5 WhatsApp 2000 True 2009\n", "6 TikTok 689 False 2016\n", "7 Telegram 500 False 2013" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NombreUsuarioses_FBKAño
0Facebook2740True2006
1Twitter339False2006
2Instagram1221True2010
3YouTube2291False2005
4LinkedIn727False2003
5WhatsApp2000True2009
6TikTok689False2016
7Telegram500False2013
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 4 } ], "source": [ "df_rrss = pd.DataFrame(rrss, columns = ['Nombre', 'Usuarios', 'es_FBK', 'Año'])\n", "df_rrss" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "id": "AJgLSgCrTyqk", "outputId": "c0d7f60d-8700-4db3-aaae-d8499a7c6fae", "colab": { "base_uri": "https://localhost:8080/", "height": 206 } }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " Nombre Usuarios es_FBK Año\n", "0 Facebook 2740 True 2006\n", "1 Twitter 339 False 2006\n", "2 Instagram 1221 True 2010\n", "3 YouTube 2291 False 2005\n", "4 LinkedIn 727 False 2003" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NombreUsuarioses_FBKAño
0Facebook2740True2006
1Twitter339False2006
2Instagram1221True2010
3YouTube2291False2005
4LinkedIn727False2003
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 5 } ], "source": [ "df_rrss.head() # muestra solo los primeros registros de la tabla" ] }, { "cell_type": "markdown", "metadata": { "id": "sbE9-pTETyql" }, "source": [ "## Crear un DataFrame vacío" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "id": "AQORtV55Tyqq", "outputId": "948173ed-81ef-4607-90e5-da7a9aae69e8", "colab": { "base_uri": "https://localhost:8080/", "height": 53 } }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "Empty DataFrame\n", "Columns: [Nombre, Usuarios, es_FBK, Año]\n", "Index: []" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NombreUsuarioses_FBKAño
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 6 } ], "source": [ "df_vacio = pd.DataFrame(columns = ['Nombre', 'Usuarios', 'es_FBK', 'Año'])\n", "df_vacio" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "id": "Fn-hQqd0Tyqw", "outputId": "0e6fb09b-8e7a-40a1-9504-79354dbfc994", "colab": { "base_uri": "https://localhost:8080/", "height": 115 } }, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ ":1: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n", " df_vacio = df_vacio.append({'Nombre':'Facebook',\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ " Nombre Usuarios es_FBK Año\n", "0 Facebook 2740 True 2006" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NombreUsuarioses_FBKAño
0Facebook2740True2006
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 7 } ], "source": [ "df_vacio = df_vacio.append({'Nombre':'Facebook',\n", " 'Usuarios':2740,\n", " 'es_FBK':True,\n", " 'Año':2006}, ignore_index = True)\n", "\n", "df_vacio" ] }, { "cell_type": "markdown", "metadata": { "id": "jFGiR_11Tyqz" }, "source": [ "En lugar de meter los datos de forma manual lo habitual es pasarle un archivo csv o Excel." ] }, { "cell_type": "markdown", "metadata": { "id": "MTQCK80NTyq2" }, "source": [ "## Seleccionar elementos" ] }, { "cell_type": "markdown", "metadata": { "id": "iQXimEJVTyq2" }, "source": [ "### Seleccionar un elemento" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "id": "74SPaQxZTyq3", "outputId": "00fcd42b-7a06-4710-f2e8-f7c2896fbd4d", "colab": { "base_uri": "https://localhost:8080/", "height": 35 } }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "'Twitter'" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "string" } }, "metadata": {}, "execution_count": 8 } ], "source": [ "# seleccionar un elemento por etiquetas\n", "df_rrss.loc[1, 'Nombre'] # loc de localizador" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "id": "gU361m4mTyq6", "outputId": "31cda16f-4065-47e6-dc6f-706aa4e96d3e", "colab": { "base_uri": "https://localhost:8080/", "height": 35 } }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "'Twitter'" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "string" } }, "metadata": {}, "execution_count": 9 } ], "source": [ "# seleccionar un elemento por número\n", "df_rrss.iloc[1, 0] # iloc de localizador por integer" ] }, { "cell_type": "markdown", "metadata": { "id": "9BGctNa9Tyq8" }, "source": [ "### Seleccionar filas o columnas" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "id": "iKC3PcckTyq9", "outputId": "b797b1d5-16d4-4ecd-e5a2-0593dbae8762", "colab": { "base_uri": "https://localhost:8080/" } }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0 Facebook\n", "1 Twitter\n", "2 Instagram\n", "3 YouTube\n", "4 LinkedIn\n", "5 WhatsApp\n", "6 TikTok\n", "7 Telegram\n", "Name: Nombre, dtype: object" ] }, "metadata": {}, "execution_count": 10 } ], "source": [ "# seleccionar por columna\n", "df_rrss['Nombre']" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "id": "skZR7Db4TyrB", "outputId": "b596bced-c4cb-46ca-fe58-b1545cb558ab", "colab": { "base_uri": "https://localhost:8080/" } }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "Nombre Instagram\n", "Usuarios 1221\n", "es_FBK True\n", "Año 2010\n", "Name: 2, dtype: object" ] }, "metadata": {}, "execution_count": 11 } ], "source": [ "# seleccionar una fila\n", "df_rrss.iloc[2]" ] }, { "cell_type": "markdown", "metadata": { "id": "7TJeVj1pTyrD" }, "source": [ "### Seleccionar según un filtro" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "id": "ARAeB0dITyrF", "outputId": "0acddc38-f9cb-41a8-f133-6498ba8de5da", "colab": { "base_uri": "https://localhost:8080/" } }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0 True\n", "1 False\n", "2 False\n", "3 True\n", "4 False\n", "5 False\n", "6 False\n", "7 False\n", "Name: Usuarios, dtype: bool" ] }, "metadata": {}, "execution_count": 12 } ], "source": [ "# seleccionar por condición booleana\n", "df_rrss['Usuarios'] > 2000 # Obtenemos un vector de True False" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "id": "D-CaYA2aTyrG", "outputId": "5dd2fcd1-af47-4add-d150-083a2443a6c1", "colab": { "base_uri": "https://localhost:8080/", "height": 112 } }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " Nombre Usuarios es_FBK Año\n", "0 Facebook 2740 True 2006\n", "3 YouTube 2291 False 2005" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NombreUsuarioses_FBKAño
0Facebook2740True2006
3YouTube2291False2005
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 13 } ], "source": [ "df_rrss[df_rrss['Usuarios'] > 2000] # Convertimos el vector de True False en valores" ] }, { "cell_type": "markdown", "metadata": { "id": "T1bs9zXTTyrH" }, "source": [ "## Ordenar columnas" ] }, { "cell_type": "markdown", "metadata": { "id": "YafS0gaLTyrI" }, "source": [ "### De forma ascendente (de menor a mayor)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "id": "uZXrq7ZVTyrJ", "outputId": "12b38cc3-e09f-4616-8097-ce64e491549c", "colab": { "base_uri": "https://localhost:8080/", "height": 300 } }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " Nombre Usuarios es_FBK Año\n", "0 Facebook 2740 True 2006\n", "2 Instagram 1221 True 2010\n", "4 LinkedIn 727 False 2003\n", "7 Telegram 500 False 2013\n", "6 TikTok 689 False 2016\n", "1 Twitter 339 False 2006\n", "5 WhatsApp 2000 True 2009\n", "3 YouTube 2291 False 2005" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NombreUsuarioses_FBKAño
0Facebook2740True2006
2Instagram1221True2010
4LinkedIn727False2003
7Telegram500False2013
6TikTok689False2016
1Twitter339False2006
5WhatsApp2000True2009
3YouTube2291False2005
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 14 } ], "source": [ "# ordenando por el nombre\n", "df_rrss.sort_values('Nombre', ascending = True) # el df_rrss no altera su orden" ] }, { "cell_type": "markdown", "metadata": { "id": "r48FfD0HTyrL" }, "source": [ "### De forma descendente (de mayor a menor)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "id": "LB0MGjSlTyrM", "outputId": "ed752ff8-5ee1-41b1-df19-dde2457fe9e9", "colab": { "base_uri": "https://localhost:8080/", "height": 300 } }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " Nombre Usuarios es_FBK Año\n", "0 Facebook 2740 True 2006\n", "3 YouTube 2291 False 2005\n", "5 WhatsApp 2000 True 2009\n", "2 Instagram 1221 True 2010\n", "4 LinkedIn 727 False 2003\n", "6 TikTok 689 False 2016\n", "7 Telegram 500 False 2013\n", "1 Twitter 339 False 2006" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NombreUsuarioses_FBKAño
0Facebook2740True2006
3YouTube2291False2005
5WhatsApp2000True2009
2Instagram1221True2010
4LinkedIn727False2003
6TikTok689False2016
7Telegram500False2013
1Twitter339False2006
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 15 } ], "source": [ "# por la cantidad de usuarios\n", "df_rrss.sort_values('Usuarios', ascending = False)" ] }, { "cell_type": "markdown", "metadata": { "id": "Sa9CIngVTyrO" }, "source": [ "### Ordenar por dos columnas" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "id": "MMSgbXkeTyrP", "outputId": "54442176-7d93-4a34-fdaf-40b4febd156d", "colab": { "base_uri": "https://localhost:8080/", "height": 300 } }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " Nombre Usuarios es_FBK Año\n", "4 LinkedIn 727 False 2003\n", "3 YouTube 2291 False 2005\n", "0 Facebook 2740 True 2006\n", "1 Twitter 339 False 2006\n", "5 WhatsApp 2000 True 2009\n", "2 Instagram 1221 True 2010\n", "7 Telegram 500 False 2013\n", "6 TikTok 689 False 2016" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NombreUsuarioses_FBKAño
4LinkedIn727False2003
3YouTube2291False2005
0Facebook2740True2006
1Twitter339False2006
5WhatsApp2000True2009
2Instagram1221True2010
7Telegram500False2013
6TikTok689False2016
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 16 } ], "source": [ "# ordenamos primero por el año y luego por el número de usuarios\n", "df_rrss.sort_values(['Año', 'Usuarios'], ascending = [True, False])" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.11" }, "colab": { "name": "0110_pandas.ipynb", "provenance": [], "include_colab_link": true } }, "nbformat": 4, "nbformat_minor": 0 }