{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" }, "colab": { "name": "kl_py_pandas_02.ipynb", "provenance": [], "collapsed_sections": [], "include_colab_link": true } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "\"Open" ] }, { "cell_type": "markdown", "metadata": { "id": "Bw4GE_218lYe", "colab_type": "text" }, "source": [ "

\n", " \n", " \n", "

\n", "\n", "\n", "

\n", "\n", "# Pandas bevezető 2.\n", "\n", "https://klajosw.blogspot.com/\n", "\n", "https://github.com/klajosw/\n", "\n", "pandas: NumPy-ra épülő adatfeldolgozó és elemző eszköz\n", "\n", "---" ] }, { "cell_type": "code", "metadata": { "id": "LrahWQTJ8lYf", "colab_type": "code", "colab": {} }, "source": [ "from datetime import datetime\n", "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline" ], "execution_count": 0, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "6ey1dG8c8lYj", "colab_type": "code", "outputId": "497dbe32-7e55-4a55-e820-131b5165f9cd", "colab": { "base_uri": "https://localhost:8080/", "height": 334 } }, "source": [ "# DataFrame-et létre lehet hozni szótárból...\n", "data1 = {\"a\": [1, 1, 2], \"b\": [3.0, 4.0, None]}\n", "df1 = pd.DataFrame(data1)\n", "print('Szótárból: ')\n", "print(df1)\n", "print('-----------------')\n", "\n", "# ...sztring-lista párok listájából\n", "data2 = [(\"a\", [1, 1, 2]), (\"b\", [3.0, 4.0, None])]\n", "df2 = pd.DataFrame.from_dict(dict(data2))\n", "print('String Listapárból: ')\n", "print(df2)\n", "print('-----------------')\n", "\n", "# ...szótárak listájából\n", "data3 = [{\"a\": 1, \"b\": 3}, {\"a\": 1, \"b\": 4}, {\"a\": 2}]\n", "df3 = pd.DataFrame(data3)\n", "print('Szótárak listájából: ')\n", "print(df3)\n", "print('-----------------')\n", "\n", "# ...és még számos egyéb módon\n" ], "execution_count": 5, "outputs": [ { "output_type": "stream", "text": [ "Szótárból: \n", " a b\n", "0 1 3.0\n", "1 1 4.0\n", "2 2 NaN\n", "-----------------\n", "String Listapárból: \n", " a b\n", "0 1 3.0\n", "1 1 4.0\n", "2 2 NaN\n", "-----------------\n", "Szótárak listájából: \n", " a b\n", "0 1 3.0\n", "1 1 4.0\n", "2 2 NaN\n", "-----------------\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "J3kwMblW8lYn", "colab_type": "code", "outputId": "24ce45aa-2dec-44a3-c532-29181702fb8c", "colab": { "base_uri": "https://localhost:8080/", "height": 70 } }, "source": [ "## egyszerű elem kimetszés\n", "def slices(s, *args): ## Kimetsző\n", " position = 0 ## Kezdő pozició\n", " for length in args:\n", " yield s[position:position + length] \n", "# a yield egy különleges függvény, amely időről időre értékeket állít elő, mint egy folytatható függvény, a meghívása egy generátort ad vissza\n", " \n", "print(list(slices('abcdefghijklmnopqrstuvwxyz0123456789', 2, 10, 50)))\n", "print('---------------------------')\n", "d,c,h = slices('LajosBélaAttilaFeri', 5, 4, 6)\n", "print(d,c,h)\n" ], "execution_count": 8, "outputs": [ { "output_type": "stream", "text": [ "['ab', 'abcdefghij', 'abcdefghijklmnopqrstuvwxyz0123456789']\n", "---------------------------\n", "Lajos Lajo LajosB\n" ], "name": "stdout" } ] }, { "cell_type": "markdown", "metadata": { "id": "Kj3NjSMwLuOQ", "colab_type": "text" }, "source": [ "---\n", "\n", "## yield utasítás\n", "\n", "Ezt akkor használjuk, amikor egy generátor függvényt definiálunk és csak a függvény törzsében használjuk. \n", "\n", "A yield utasítás használata egy függvény definiálásánál elegendő ahhoz, hogy egy normál függvényből egy generátor függvényt készítsünk.\n", "\n", "Amikor a generátor függvényt meghívjuk, akkor visszatér egy iterátorral, amit generátor iterátornak, vagy másképp generátornak nevezünk. \n", "\n", "A generátor next() hívásának hatására a függvény törzse híváskor hajtódik végre, és addig ismétlődik, amíg egy kivételt nem vált ki.\n", "\n", "A yield utasítás végrehajtásakor, a generátor állapota fagyott lesz, és a kifejezés lista értékével tér vissza next() hívójához. \n", "\n", "\n", "---" ] }, { "cell_type": "code", "metadata": { "id": "Jwn39oZdI8L1", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 52 }, "outputId": "819809f6-f61c-488e-841c-178bf61b4214" }, "source": [ "## yield minta\n", "def fib(max):\n", " a, b = 0, 1 \n", " while a < max:\n", " yield a ## yield Generátort ad vissza, csak a meghívásakor töltődik fel \n", " a, b = b, a + b \n", "\n", "print('Fibonacci sor lista bejárása: ') \n", "for n in fib(200): ## Generátoros függvény meghívása és for ciklusban olvasása és kiírása \n", " print(n, end=' ') \n" ], "execution_count": 17, "outputs": [ { "output_type": "stream", "text": [ "Fibonacci sor lista bejárása: \n", "0 1 1 2 3 5 8 13 21 34 55 89 144 " ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "_kHeRA4T8lYp", "colab_type": "code", "outputId": "04a19c71-7c63-445f-84d1-dee29a568e98", "colab": { "base_uri": "https://localhost:8080/", "height": 52 } }, "source": [ "# minden DataFrame-hez és Series-hez tartozik index\n", "print(df1.index)\n", "# (alapértelmezés szerint az index 0-tól induló, 1-esével növekedő sorszám)\n", "\n", "# ...de természetesen mást is megadhatunk indexnek\n", "df4 = pd.DataFrame(data1, [\"xx\", \"yy\", \"zz\"])\n", "print(df4.index)\n", "\n", "#Int64Index([0, 1, 2], dtype='int64')\n", "#Index(['xx', 'yy', 'zz'], dtype='object')\n" ], "execution_count": 18, "outputs": [ { "output_type": "stream", "text": [ "RangeIndex(start=0, stop=3, step=1)\n", "Index(['xx', 'yy', 'zz'], dtype='object')\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "ImpoVp7v8lYs", "colab_type": "code", "outputId": "0f131df7-5f21-4778-e4a0-59cb1decc226", "colab": { "base_uri": "https://localhost:8080/", "height": 87 } }, "source": [ "\n", "\n", "# példák Series létrehozásra:\n", "se1 = pd.Series([2, 3, 4])\n", "se2 = pd.Series([2, 3, 4], [\"xx\", \"yy\", \"zz\"]) # a 2. argumentum az index\n", "\n", "# DataFrame-ből oszlopot [] operátorral lehet kiválasztani\n", "df1[\"a\"] # <= Series-t ad eredményül\n", "# ...illetve ha az oszlop neve érvényes azonosítónév, akkor . operátorral is\n", "df1.a # <= Series-t ad eredményül\n" ], "execution_count": 19, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0 1\n", "1 1\n", "2 2\n", "Name: a, dtype: int64" ] }, "metadata": { "tags": [] }, "execution_count": 19 } ] }, { "cell_type": "code", "metadata": { "id": "Y0WfqCDk8lYv", "colab_type": "code", "outputId": "33bb0e79-e183-486a-b3b3-2ae46dbed343", "colab": { "base_uri": "https://localhost:8080/", "height": 106 } }, "source": [ "# DataFrame-ből sort a .iloc attribútumon keresztül lehet kiválasztani\n", "df1.iloc[0] # <= ez is Series-t ad eredményül\n", "df1.iloc[[1, 0]] # <= DataFrame-et ad eredményül, mivel 2 sort választottunk ki" ], "execution_count": 20, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ab
114.0
013.0
\n", "
" ], "text/plain": [ " a b\n", "1 1 4.0\n", "0 1 3.0" ] }, "metadata": { "tags": [] }, "execution_count": 20 } ] }, { "cell_type": "code", "metadata": { "id": "eQg3w0Bd8lYy", "colab_type": "code", "outputId": "9491448c-ba52-42a2-ab04-0be1cae49621", "colab": { "base_uri": "https://localhost:8080/", "height": 70 } }, "source": [ "# Series-ből elemet [] operátorral lehet kiválasztani\n", "print(se1[0])\n", "print(se2[\"xx\"])\n", "\n", "# a nyers adattartalmát a values attribútumon keresztül lehet elérni\n", "se1.values # <= numpy tömböt ad eredményül" ], "execution_count": 21, "outputs": [ { "output_type": "stream", "text": [ "2\n", "2\n" ], "name": "stdout" }, { "output_type": "execute_result", "data": { "text/plain": [ "array([2, 3, 4])" ] }, "metadata": { "tags": [] }, "execution_count": 21 } ] }, { "cell_type": "code", "metadata": { "id": "9ADZnwSV8lY1", "colab_type": "code", "outputId": "feaf9ba5-6511-479a-8e07-42c4be8c5740", "colab": {} }, "source": [ "# Import\n", "#import pandas as pd\n", "#import numpy as np\n", "\n", "path = r'c:\\Users\\User\\Documents\\mintak\\jupiter\\kl\\aa_kl_2020\\fixlinefile.txt'\n", "\n", "# Using Pandas with a column specification\n", "col_specification = [(0, 9), (10, 18), (19, 27), (29, 36), (38, 45), (46, 100)]\n", "data = pd.read_fwf(path, colspecs=col_specification) ## Read a table of fixed-width formatted lines into DataFrame.\n", "#print(data.dtypes)\n", "#print(data.columns) ## Index(['ncalls', 'tottime', 'percall', 'cumtime', 'percall.1', 'filename:lineno(function)'], dtype='object')\n", "#print(data.index) ## RangeIndex(start=0, stop=10, step=1)\n", "\n", "print(data.describe()) ## adatframe info\n", "print('---------------------------')\n", "print(data['ncalls'].min())\n", "print(data['ncalls'].max())\n", "print('---------------------------')\n", "print(data['ncalls'].describe()) ## adatframe egy mezőről info\n", "print('---------------------------')\n", "\n", "\n", "print('---------------------------')\n", "print(data)\n", "print('---------------------------')\n", "\n", " \n", "## kiírás filebe\n", "data.to_csv('kimenet.csv', sep='|') ## separátorok lehetnek még: | \\t , ; ¤ @ ~\n", "\n", "## diagram\n", "data[['cumtime', 'percall']].plot(figsize=(10, 6), style=['-', '--'], lw=2)" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ " tottime apercall cumtime percall\n", "count 10.000000 10.000000 10.000000 10.000000\n", "mean 0.001100 0.000300 0.012400 0.006300\n", "std 0.002601 0.000949 0.005296 0.009166\n", "min 0.000000 0.000000 0.008000 0.000000\n", "25% 0.000000 0.000000 0.009000 0.000000\n", "50% 0.000000 0.000000 0.011000 0.000000\n", "75% 0.000000 0.000000 0.012500 0.010250\n", "max 0.008000 0.003000 0.022000 0.022000\n", "---------------------------\n", "1\n", "50\n", "---------------------------\n", "count 10\n", "unique 3\n", "top 50\n", "freq 5\n", "Name: ncalls, dtype: object\n", "---------------------------\n", "---------------------------\n", " ncalls tottime apercall cumtime percall \\\n", "0 1 0.000 0.000 0.022 0.022 \n", "1 1 0.000 0.000 0.022 0.022 \n", "2 354/52 0.000 0.000 0.013 0.000 \n", "3 1 0.000 0.000 0.011 0.011 \n", "4 50 0.000 0.000 0.011 0.000 \n", "5 50 0.000 0.000 0.011 0.000 \n", "6 50 0.000 0.000 0.009 0.000 \n", "7 50 0.000 0.000 0.009 0.000 \n", "8 50 0.008 0.000 0.008 0.000 \n", "9 1 0.003 0.003 0.008 0.008 \n", "\n", " filename:lineno(function) \n", "0 {built-in method builtins.exec} \n", "1 :5() \n", "2 {built-in method numpy.core._multiarray_umath.... \n", "3 :9() \n", "4 <__array_function__ internals>:2(histogram) \n", "5 histograms.py:680(histogram) \n", "6 <__array_function__ internals>:2(sort) \n", "7 fromnumeric.py:837(sort) \n", "8 {method 'sort' of 'numpy.ndarray' objects} \n", "9 :1(step) \n", "---------------------------\n" ], "name": "stdout" }, { "output_type": "display_data", "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "tags": [], "needs_background": "light" } } ] }, { "cell_type": "code", "metadata": { "id": "9eaoQqmu8lY3", "colab_type": "code", "outputId": "825a9323-b709-4a91-87a0-ea17e0f85a4a", "colab": {} }, "source": [ "# Az openair.csv fájl London légszennyezetttségéről tartalmaz adatokat.\n", "# Töltsük be a fájlt DataFrame-be!\n", "url = \"https://github.com/ipython-books/cookbook-2nd-data/blob/master/federer.csv?raw=true\"\n", "\n", "df = pd.read_csv(url)\n", "print(df.head(3)) ## három sor liíratása\n", "print('-------------')\n", "\n", "# Megjegyzések:\n", "# - a pandas.read_csv függvénynek rengeteg paramétere van,\n", "# hogy be tudja tölteni a valós életben előforduló CSV fájl változatokat\n", "# - a pandas képes kezelni a hiányzó adatokat\n", "# (ezek a táblában NaN értékként jelennek meg)\n", "\n", "# így tudunk összesítő információkat kérni a DataFrame-ről\n", "df.info()" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ " year tournament start date type surface draw \\\n", "0 1998 Basel, Switzerland 05.10.1998 WS Indoor: Hard Draw: 32 \n", "1 1998 Toulouse, France 28.09.1998 WS Indoor: Hard Draw: 32 \n", "2 1998 Toulouse, France 28.09.1998 WS Indoor: Hard Draw: 32 \n", "\n", " atp points atp ranking tournament prize money round ... \\\n", "0 1 396.0 $9,800 R32 ... \n", "1 59 878.0 $10,800 R32 ... \n", "2 59 878.0 $10,800 R16 ... \n", "\n", " player2 2nd serve return points total player2 break points converted won \\\n", "0 22.0 4.0 \n", "1 19.0 0.0 \n", "2 30.0 0.0 \n", "\n", " player2 break points converted total player2 return games played \\\n", "0 8.0 8.0 \n", "1 1.0 8.0 \n", "2 4.0 10.0 \n", "\n", " player2 total service points won player2 total service points total \\\n", "0 36.0 50.0 \n", "1 33.0 65.0 \n", "2 46.0 75.0 \n", "\n", " player2 total return points won player2 total return points total \\\n", "0 26.0 53.0 \n", "1 8.0 41.0 \n", "2 23.0 73.0 \n", "\n", " player2 total points won player2 total points total \n", "0 62.0 103.0 \n", "1 41.0 106.0 \n", "2 69.0 148.0 \n", "\n", "[3 rows x 70 columns]\n", "-------------\n", "\n", "RangeIndex: 1179 entries, 0 to 1178\n", "Data columns (total 70 columns):\n", "year 1179 non-null int64\n", "tournament 1179 non-null object\n", "start date 1179 non-null object\n", "type 1179 non-null object\n", "surface 1179 non-null object\n", "draw 1179 non-null object\n", "atp points 1139 non-null object\n", "atp ranking 1177 non-null float64\n", "tournament prize money 1170 non-null object\n", "round 1179 non-null object\n", "opponent 1179 non-null object\n", "ranking 1105 non-null object\n", "score 1179 non-null object\n", "stats link 1179 non-null object\n", "tournament.1 1179 non-null object\n", "tournament round 1179 non-null object\n", "time 1179 non-null int64\n", "winner 1179 non-null object\n", "player1 name 1179 non-null object\n", "player1 nationality 1179 non-null object\n", "player1 aces 1027 non-null float64\n", "player1 double faults 1027 non-null float64\n", "player1 1st serves in 1027 non-null float64\n", "player1 1st serves total 1027 non-null float64\n", "player1 1st serve points won 1027 non-null float64\n", "player1 1st serve points total 1027 non-null float64\n", "player1 2nd serve points won 1027 non-null float64\n", "player1 2nd serve points total 1027 non-null float64\n", "player1 break points won 1027 non-null float64\n", "player1 break points total 1027 non-null float64\n", "player1 service games played 1027 non-null float64\n", "player1 1st serve return points won 1027 non-null float64\n", "player1 1st serve return points total 1027 non-null float64\n", "player1 2nd serve return points won 1027 non-null float64\n", "player1 2nd serve return points total 1027 non-null float64\n", "player1 break points converted won 1027 non-null float64\n", "player1 break points converted total 1027 non-null float64\n", "player1 return games played 1027 non-null float64\n", "player1 total service points won 1027 non-null float64\n", "player1 total service points total 1027 non-null float64\n", "player1 total return points won 1027 non-null float64\n", "player1 total return points total 1027 non-null float64\n", "player1 total points won 1027 non-null float64\n", "player1 total points total 1027 non-null float64\n", "player2 name 1179 non-null object\n", "player2 nationality 1110 non-null object\n", "player2 aces 1027 non-null float64\n", "player2 double faults 1027 non-null float64\n", "player2 1st serves in 1027 non-null float64\n", "player2 1st serves total 1027 non-null float64\n", "player2 1st serve points won 1027 non-null float64\n", "player2 1st serve points total 1027 non-null float64\n", "player2 2nd serve points won 1027 non-null float64\n", "player2 2nd serve points total 1027 non-null float64\n", "player2 break points won 1027 non-null float64\n", "player2 break points total 1027 non-null float64\n", "player2 service games played 1027 non-null float64\n", "player2 1st serve return points won 1027 non-null float64\n", "player2 1st serve return points total 1027 non-null float64\n", "player2 2nd serve return points won 1027 non-null float64\n", "player2 2nd serve return points total 1027 non-null float64\n", "player2 break points converted won 1027 non-null float64\n", "player2 break points converted total 1027 non-null float64\n", "player2 return games played 1027 non-null float64\n", "player2 total service points won 1027 non-null float64\n", "player2 total service points total 1027 non-null float64\n", "player2 total return points won 1027 non-null float64\n", "player2 total return points total 1027 non-null float64\n", "player2 total points won 1027 non-null float64\n", "player2 total points total 1027 non-null float64\n", "dtypes: float64(49), int64(2), object(19)\n", "memory usage: 644.9+ KB\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "otb2kzs_8lY6", "colab_type": "code", "outputId": "7481a506-bfc1-484d-f9be-c3ce727bbe95", "colab": {} }, "source": [ "# írassuk ki az oszlopok minimális, maximális és és átlagos értékét\n", "data = []\n", "for c in df.columns[22:30]: # kihagyjuk a dátum oszlopot, mivel ott nincs értelme az átlagnak\n", " se = df[c]\n", " data.append({\"column\": c, \"min\": se.min(), \"max\": se.max(), \"mean\": se.mean()})\n", " # (megjegyzés: a pandas a NaN értékeket nem veszi figyelembe a statisztikakészítéskor)\n", "stats = pd.DataFrame(data)\n", "stats\n", "\n", "# megjegyzés: a statisztikákat a describe() függvény segítségével is lekérhettük volna\n", "print(df[\"player1 aces\"].describe()) # <= egy oszlop statisztikáit adja vissza (Series-be csomagolva)\n", "\n", "df.describe() # <= az összes oszlop statisztikáit visszaadja (DataFrame-be csomagolva)\n", "\n", "df[\"player1 aces\"][:10].plot()" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ "count 1027.000000\n", "mean 7.658228\n", "std 4.791261\n", "min 0.000000\n", "25% 4.000000\n", "50% 7.000000\n", "75% 10.000000\n", "max 50.000000\n", "Name: player1 aces, dtype: float64\n" ], "name": "stdout" }, { "output_type": "execute_result", "data": { "text/plain": [ "" ] }, "metadata": { "tags": [] }, "execution_count": 50 }, { "output_type": "display_data", "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "tags": [], "needs_background": "light" } } ] }, { "cell_type": "code", "metadata": { "id": "1-HbhXpw8lY8", "colab_type": "code", "outputId": "1f7d50d4-3afa-452c-d869-7b8cbd29064c", "colab": {} }, "source": [ "player = 'Roger Federer'\n", "df['win'] = df['winner'] == player\n", "df['win'].tail()" ], "execution_count": 0, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "1174 False\n", "1175 True\n", "1176 True\n", "1177 True\n", "1178 False\n", "Name: win, dtype: bool" ] }, "metadata": { "tags": [] }, "execution_count": 52 } ] }, { "cell_type": "code", "metadata": { "id": "K553OH798lY_", "colab_type": "code", "outputId": "8caae763-9391-4bf8-9fee-ff63bee9d490", "colab": {} }, "source": [ "won = 100 * df['win'].mean()\n", "print(f\"{player} {won:.0f}% -ban győzött a mérkőzései során.\")" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ "Roger Federer 82% -ban győzött a mérkőzései során.\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "B_RZMXwJ8lZB", "colab_type": "code", "outputId": "29377823-4141-42c4-ef72-9b2da5d3e2b5", "colab": {} }, "source": [ "date = df['start date']\n", "print(date)\n", "print('------------------')\n", "df['dblfaults'] = (df['player1 double faults'] / df['player1 total points total'])\n", "print(df['dblfaults'].tail())\n", "print('------------------')\n", "print(df['dblfaults'].describe())" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ "0 05.10.1998\n", "1 28.09.1998\n", "2 28.09.1998\n", "3 28.09.1998\n", "4 24.08.1998\n", " ... \n", "1174 16.01.2012\n", "1175 02.01.2012\n", "1176 02.01.2012\n", "1177 02.01.2012\n", "1178 02.01.2012\n", "Name: start date, Length: 1179, dtype: object\n", "------------------\n", "1174 0.018116\n", "1175 0.000000\n", "1176 0.000000\n", "1177 0.011561\n", "1178 NaN\n", "Name: dblfaults, dtype: float64\n", "------------------\n", "count 1027.000000\n", "mean 0.012129\n", "std 0.010797\n", "min 0.000000\n", "25% 0.004444\n", "50% 0.010000\n", "75% 0.018108\n", "max 0.060606\n", "Name: dblfaults, dtype: float64\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "GSiU4JSo8lZD", "colab_type": "code", "outputId": "6d5341af-0108-4362-9627-9d83d9a5fb5f", "colab": {} }, "source": [ "## milyen tipusú pályákon volt eredményes\n", "df.groupby('surface')['win'].mean()" ], "execution_count": 0, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "surface\n", "Indoor: Carpet 0.736842\n", "Indoor: Clay 0.833333\n", "Indoor: Hard 0.836283\n", "Outdoor: Clay 0.779116\n", "Outdoor: Grass 0.871429\n", "Outdoor: Hard 0.842324\n", "Name: win, dtype: float64" ] }, "metadata": { "tags": [] }, "execution_count": 67 } ] }, { "cell_type": "code", "metadata": { "id": "IiKhm0Xo8lZG", "colab_type": "code", "outputId": "b9c36ab5-2132-4d56-b93d-cd113c2b78b7", "colab": {} }, "source": [ "from datetime import datetime\n", "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "\n", "gb = df.groupby('year')\n", "\n", "\n", "fig, ax = plt.subplots(1, 1)\n", "ax.plot_date(date, df['dblfaults'], alpha=.25, lw=0)\n", "ax.plot_date(gb['start date'].max(), gb['dblfaults'].mean(), '-', lw=3)\n", "ax.set_xlabel('Year')\n", "ax.set_ylabel('Double faults per match')\n", "ax.set_ylim(0)" ], "execution_count": 0, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "(0, 0.06363636363636364)" ] }, "metadata": { "tags": [] }, "execution_count": 74 }, { "output_type": "display_data", "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "tags": [], "needs_background": "light" } } ] }, { "cell_type": "code", "metadata": { "id": "vYWz-H2u8lZI", "colab_type": "code", "colab": {} }, "source": [ "" ], "execution_count": 0, "outputs": [] } ] }