{
"cells": [
{
"cell_type": "code",
"execution_count": 176,
"metadata": {
"ExecuteTime": {
"end_time": "2019-04-29T13:16:53.558466Z",
"start_time": "2019-04-29T13:16:53.554327Z"
}
},
"outputs": [],
"source": [
"# Common imports\n",
"import numpy as np\n",
"import os\n",
"\n",
"from math import log\n",
"import pandas as pd\n",
"import operator\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 载入数据并查看"
]
},
{
"cell_type": "code",
"execution_count": 177,
"metadata": {
"ExecuteTime": {
"end_time": "2019-04-29T13:16:53.579311Z",
"start_time": "2019-04-29T13:16:53.560121Z"
},
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" department | \n",
" status | \n",
" age | \n",
" salary | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" Sales | \n",
" Senior | \n",
" 3135 | \n",
" 46K50K | \n",
"
\n",
" \n",
" | 1 | \n",
" Sales | \n",
" Junior | \n",
" 2630 | \n",
" 26K30K | \n",
"
\n",
" \n",
" | 2 | \n",
" Sales | \n",
" Junior | \n",
" 3135 | \n",
" 31K35K | \n",
"
\n",
" \n",
" | 3 | \n",
" Systems | \n",
" Junior | \n",
" 2125 | \n",
" 46K50K | \n",
"
\n",
" \n",
" | 4 | \n",
" Systems | \n",
" Senior | \n",
" 3135 | \n",
" 66K70K | \n",
"
\n",
" \n",
" | 5 | \n",
" Systems | \n",
" Junior | \n",
" 2630 | \n",
" 46K50K | \n",
"
\n",
" \n",
" | 6 | \n",
" Systems | \n",
" Senior | \n",
" 4145 | \n",
" 66K70K | \n",
"
\n",
" \n",
" | 7 | \n",
" Marketing | \n",
" Senior | \n",
" 3640 | \n",
" 46K50K | \n",
"
\n",
" \n",
" | 8 | \n",
" Marketing | \n",
" Junior | \n",
" 3135 | \n",
" 41K45K | \n",
"
\n",
" \n",
" | 9 | \n",
" Secretary | \n",
" Senior | \n",
" 4650 | \n",
" 36K40K | \n",
"
\n",
" \n",
" | 10 | \n",
" Secretary | \n",
" Junior | \n",
" 2630 | \n",
" 26K30K | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" department status age salary\n",
"0 Sales Senior 3135 46K50K\n",
"1 Sales Junior 2630 26K30K\n",
"2 Sales Junior 3135 31K35K\n",
"3 Systems Junior 2125 46K50K\n",
"4 Systems Senior 3135 66K70K\n",
"5 Systems Junior 2630 46K50K\n",
"6 Systems Senior 4145 66K70K\n",
"7 Marketing Senior 3640 46K50K\n",
"8 Marketing Junior 3135 41K45K\n",
"9 Secretary Senior 4650 36K40K\n",
"10 Secretary Junior 2630 26K30K"
]
},
"execution_count": 177,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"csv_path=os.path.join(\"datas\", \"salary\",\"分类贝叶斯.csv\")\n",
"iris=pd.read_csv(csv_path,sep=',',header=0,encoding=\"ISO-8859-1\")\n",
"iris"
]
},
{
"cell_type": "code",
"execution_count": 178,
"metadata": {
"ExecuteTime": {
"end_time": "2019-04-29T13:16:53.593540Z",
"start_time": "2019-04-29T13:16:53.581264Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 178,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"iris.describe"
]
},
{
"cell_type": "code",
"execution_count": 179,
"metadata": {
"ExecuteTime": {
"end_time": "2019-04-29T13:16:53.606124Z",
"start_time": "2019-04-29T13:16:53.595058Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['department', 'status', 'age', 'salary'], dtype='object')"
]
},
"execution_count": 179,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"spec=iris.columns\n",
"spec"
]
},
{
"cell_type": "code",
"execution_count": 180,
"metadata": {
"ExecuteTime": {
"end_time": "2019-04-29T13:16:53.619432Z",
"start_time": "2019-04-29T13:16:53.607372Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"Junior 6\n",
"Senior 5\n",
"Name: status, dtype: int64"
]
},
"execution_count": 180,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test=pd.Series(iris['status'].value_counts())\n",
"test"
]
},
{
"cell_type": "code",
"execution_count": 181,
"metadata": {
"ExecuteTime": {
"end_time": "2019-04-29T13:16:53.632849Z",
"start_time": "2019-04-29T13:16:53.620775Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Junior 6\n",
"Senior 5\n",
"Name: status, dtype: int64\n",
"Index(['Junior', 'Senior'], dtype='object')\n",
"5\n"
]
}
],
"source": [
"print(iris['status'].value_counts(),iris['status'].value_counts().index,iris['status'].value_counts()[1],sep='\\n')"
]
},
{
"cell_type": "markdown",
"metadata": {
"ExecuteTime": {
"end_time": "2019-04-29T08:47:45.783848Z",
"start_time": "2019-04-29T08:47:45.781773Z"
}
},
"source": [
"# 查看公式"
]
},
{
"attachments": {
"image.png": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAV0AAABbCAYAAADQplYeAAAVZklEQVR4Ae2dTcxfwxfH7/PPf0ks2oWXjbbUiqBbG1oJoYKgItHaCRbES9SKnUgrsamEVUvCgoUoIYpY2PYhrLyvWhIstNaefz7X//xynnlm5s7c95czyfPc+5s7c86Z77n33JlzZ+asbWxsbBSWDAFDwBAwBDpHYG1tbe0/nXMxBoaAIWAIGAIrBMzorqCwE0PAEDAEukfAjG73GBsHQ8AQMARWCJjRXUFhJ4aAIWAIdI+AGd3uMTYOhoAhYAisEDCju4LCTuaMwFdffVVcddVVBUdLhsCQCJjRHRL9ifB++eWXi7W1taQ/yqYmykr5Dz74IIn+vn37ir///rv49ddfi127dgXrQK/tFOMp/JDtwIEDQeOu21xHPuqn6EJwTeWRIxc4XHHFFV45Tp48WbIEh/vvvz+IA4VyeKa2YxLlmKdryRBIQeDgwYPM6S7/ONfp/PnzG3v37i2v7dy5c+Ps2bP68qbz9fX1jW3btm3EaAgfOUIbHm4SWlLu5MmTbpHyN+V27969wTE3HT16dNVuzt0ET+HPMSQr9QSnKoxcHu7vkC6EPnKk8ACP7du3b9GFy4/fKTisra2tsACHc+fO+UiVeci6b9++Us5goZldKF8KM2uTNadDBLRx8RkfDC0PuhgenyhCI2QcNY0UQyZGBiMeM6h1jC51oIsc7gvCbZuWu6osdSlTJbPLQ/8WHJHN1YWWJfYCgAZG8v3339ekvedgkYrDrl27VmX/+ecfLz2deejQoY3Tp0/rrNmem9GdrWq7aZj0dGLGQnpglHGTGDHXSITKaaMbeuAxHDF5hHau0ZW2+oya0HSPUif0QnHLNzG8wivUdq0H2u4m8ujhVumCesIrpawun2LMRS7asQTDa0ZXNG7HSgR074kHOpTkYcdY6ST5qQZJerCu4ZUhs1yPyaL55xhdDAB8Y71ETVvO4XHfffd53SBSxj2KQUttB/VTdCF40w4Xc7lWZRTFMAsWruyh34JDzLXg1gUHet3IltI7dutP5bcZ3aloagRy8uCKAXQfYhFPGwOMoyQeQgyZGEzJrzpKPeErRx5M/nKMIrRSfLoYc/iEepAxmWn/Cy+8ECuy5ZpglsOvShdCk3a4mAum5J85c2aLPJIBDvhbBYucXmhdHHBLgEMOL5F3KkczulPR1AjklB6Z+xBr0aQMDyrnkqRnxTE3aZrQFSMQk8PHI9XoCr86svr4VuVJj512pfIUGUMYyHVXD8iSqgtNI1WuqrbGrmsjD7+59nYxujZlrHz12L8YAkz/+eijj8oiN9xwQ3HJJZdsKc781xdffLHM37t3b/Hwww+X5+R/+OGH5fk111yzpV5VBnSgp9Off/5ZvPvuu145dLncc93Oe++9N7d6rfIXXHBBceutt5Z1v/zyy3IqXIyQltGni5AeoJmqC3h8/PHHKzHuueee1XlXJ+Bwyy23lNPQUnDoSo4+6JrR7QPlifP44Ycfiq+//rpshc9wHjp0qLj++usLjOHBgweLTz/9tOAhIp05c6bM55zFCbkJOkeOHCm2bdu2qeorr7yy6XcbP7744ovis88+K3bu3Fns2bOnDZJJNASXn3/+uTh9+nS0TpUuQnqAaKouBAfq9IlFDg5RkEZ+0YzuyBU0BvH0w/r0009vmRT/xhtvlMaWrZlPnDixSeR33nmn/I3RvOyyyzZdS/1x3XXXFc8999ym4vCUBQmbLjT48d1335W1fT3IBmQrq4KLvFQEr1ClKl3w0vPpAXpCu0oX4CDbbIPFpZdeGhKn1XyNAyMZkaFVJiMgZkZ3BEoYuwjysDLMP3/+fPkw8EDoP9fY0iaGqRiJNpLPzfD4449XDsdzeH/zzTdlcV9vPodOl2WrdOHTA/Lk6EJwoN6YsegS5y5pm9HtEt0Z0GbJJz42Ej0RcRvkNu2iiy4qLr744txqq/KvvfZaOfRfZRRFwXD8rbfe0lm1z7VRkmFubWINKvKSQhZf6kMX8D579uyK/e7du1fnfZ7EcOhTji54mdHtAtUZ0fztt9+Kv/76q2xRXx+XXPhwI/CRbn19vTh69Oimy+TzgWjKiZcRL6WqNAZdVMnY5HoqDk14jKGuGd0xaGHEMnz++eflh7AqP2BXTaB3hxvh+PHjBb5d183Ax7tnnnkm2DtMlYsevPicxbebWrevcn3oAhy0D/f777/vq3mL4WNGdzGqzm8oQ02ZKnbttdcWV155ZT6R/9egt0xPLSfB/8EHHyweffTR4vbbby+rymwGTYcZB7gfmibxX2qfZlOaKfV1DzZUvk9dCA7I0icWKTiE8JlSvhndKWmrZ1n19KQ6/lzde6wj+mOPPVb2Pp966qlN1enxuqkNN8MDDzxQTpHKnSeKQYxt5+jKGvsdwrlPXYAD22aSwEL7eGOyg0PVdo6x+vpaCAddZqrnZnSnqrke5JbhLKzq+nOlHm6AnJkM7LVK+WPHjnlb6lsw0dTNwKIPetW5H+ief/754vDhw6X7wytsRSbtBB+S4OVW6VMX4PDII4+UIoDF22+/7Yrj/Q0Ozz77bCs4sCCDfYNnmWJL8+zachHQy1NDy01T0NH7ALC0NCXJEtTQHg/QkD0EZGmwHEM8UpcB026hFeOPDIJRrJxuP3tF+JK0N4Sz8EGuUBkfXTdPyxLCSerAU/ZegG9scxwpW4WDbPkIDr7NcJCJTW9oY2xfCJFxikdeItbTneWrtHmjfFO06lCl18QEexL+YYagocQ1IkOwAIPEqrNYeR8d6uZGTdB0cImwwIC0f//+cpjNxzw3wePyyy8vV8uJv9ktw+8LL7yw2LFjR3lJPtS55cRvGlqU0ZUumHMdSuBw6tSpFRZ33HGH180gOLz00ksrv7uPposD9N307bfflnO/Qzi45Sf7e4pvC5O5GwR0j0p6e/qYsxOWllB6pbFemmzEovnJuVsPOeVa6OjWSe3parljMlX1FDWd2Ln0Pl1s+9BFTm8yhEWbONguY7E7xa4ZApkIyEPb1kOayb50R6Rs7ZhLt2l5huS8OMCnrzS0LnztBAfbT9eHzELy6vSKFgJN7WZKj87tgdYmmFlxjDqV3mzfmGhd5PR2MyHPKi6x0sYiT5bwGYVxidT26eLLSYlKSpkcHxtlc8rjbwtFhZUNUfALtrlqCbpVbZeotYAckxE60Mtt99T8Wfh22cSE+bp86V9CEh81OmYnNjfhq2X3ti62qXR56d+ii3Pnzm3ZSEiXa+scHG6++ebymQEH30Y2PJ/sK6EXZrTFf3R0Moy0t6gMVXxDJHmTcy3lbY5fK3WYxTBV/Hm+IasM26QMsqSmlF6Rbpvw4BiLZgBd2ijl9ddeoZeCU2o7xlhOMEjVc1ttSNFpW7z0venTNXykjL4H2uKfSgdMUiMBp9LU5aSNggFH3ywIyvnyNa25nJcvgKaN0cYN8NwkQxkAjxkk6KTcgPLQQq/qwdW8Xbliv1MfUE1fbqxYG8Wwuh9NtCy0KXZdl53queAWw6rttqXqtE2+cq+6L1J07Oa1yTeHFrrgAxa68E3jyqEVKgsOGHfa7LoPiARM/lISRre2e0G67LJOnbX5N910k2SvjnrKEEMpVta4iaHFQw89FJ1yQh2G4LJJMxufhLaxE/rwZrJ7V0mGaZo+S1JZSeVLbA4NBkzF8a2qog5tuu2228rhWJsuEZ88Q+WB208//VRGTGhj+e5Q7ajiKwsa9BQo7mGW2dJ+cBg6IcOPP/5Y6uL111/vRBxw+OOPP8qpg7rNYHH11VeXWHTCeKxEm75heGtX9WL1MMPtzepeT0wWTcPXow7VlR5O6LovX+pwTEnSy9XHUDvdfB99wWRJPQAfDlPOk1EN90SKzqfc1pjs4CCLLJbiQojhUb4HYgWqrnEziaEJ3VhiQCjnDqkwagyl3XzNV9+8dYbd8M+N0JprdKUdgoUcxf3BMXcoLbilGn6NmZ0Pj4B0EuQeGF6iYSTQOMw12GQOshjdRu4FcS3E4iixyTTrt0kM9fXwghVHrDnXw6+yoPqnV+Mw7A4Ny1WVTafwY014lwmZ3HAy8CMgIxuAsGnIm2++mbUBuKzg6SIWWJdYGO1/ozTI7myhvRSWgBOzFiTA5az3UshUZm2jC6ByY4WMJj5JX4RYZORaVZRYzYM6Y76B3X1ekZcXCkEa60wJYpkkUWJzd7zK1L8V7wABltf+8ssvZdyz0NLfDtiOjqTh4FdJbaOrt5rT+28Km1iEWMronZVC4VGGikoqbcg5YiR9UWsxvHV7q+DCKKEqQmyOnFa2ewTQF3prugdx95J2y8Fw8ONb2+hqo5kbIRZRJMBeLCKBuC8oH+pN+5s1TG7IzVA3cq30kgQrt1UpizRCizi2b9/e6oIRV7Yl/xZ9MVLxbeyyFGwkoi844C6z9C8CtY2u3Fi5EWJhi9sAo12VZPclyvl601X1h7juczMgR9uRa6HJ7lY6Im/OOVN4cv3jQ+A5NZ6sPsQlpKdQMupjetSSksbhxhtvLJvOtNCl4eDTeS2jK4BCsOkO76Eosa5hDrkgfI0aMk9/+NNyMNysG7mWFxR4WBo/AjrkDDpjOTjfItzoF+NvSTMJfTjwMW1pOPhQ/K8vsypPAzrmj1tV7Wj7OsN9iVrLhHDZF1b4cI0FJKk9zCYhy4WnHYdBAF/+XXfdFV0IM4xk/XIFh7vvvrv45JNPyoVN/XIfJ7daPV1ZaRPzxzZtLr4w8WlCS/t3m9Luoj69/1jUWnhyAzYNKdOF7EazPQR4oeK6wdWzZBcOOPz+++8rHFhJaulfBLKNLkMmmSrWxtfZWJRY7cfV/t2xKQ9MQlFreTHplBO5lhGFJUPAEJgXAtlGV08Vq+vPdXuxIUglOivXc+erYgjbitAakk/yY1FrfYsmciPXhnC22QuiATsaAhNCIGcJG2VlWR9LXUNLf1Nopiwhdvnl7Lnw5JNPltECUmRxy+QuA44t8dXLmGV5MMdYHZFHMGqCs9BKOWr57LxYLXE3LIbFYk7Lh3k1ZPd0xbUQW/qb8s7Zs2dPAQ1SzF+rp2DxYYreXSzJF2Omqfg+WOF7lU3P9UbjMZqxa0yBeeKJJ4LzMUOLJlLcDODSFOeY7O61nClnVnaj9nQ9wy4PO+aazyllG12MRRuJPRFY8EDCkGMsfQmjxVLatiK0yp4G8AoN231yuHli3HkR1IlaCz3qxuYt4seewqIQFxv7bQgYAhEEUoadoSEyw646O38JT9mdK7bLmJSVo2wl6Rvy5bgfhJ7vWOVeCMngtiOGm5bfrYdMtsuYTzPt5GkXmdaDe557P1Fe6ohryKWpf2sXk+hbX9fn0NP020HCqPSNQGmK+2bq8hMDJjere32I31VGtw+Z5KHtg9dSeci9h3HjXJJ+WfpeiFJOjtJ50DS4puloA6qNrdCQo9CS8tqfL/RSZBJ6dhwXAqMwuvKG50YaSxra6OqHayyYzFEOebFh4NyXvtyXXIsZSaGhjaPGStMRQxqjJ7qPjSAx7rHrmr+djwsBjG62Tzfiqah1Cd+uRImtRSCjkvhhQ9FZM0h1WlRHie2U0cKJywdcvU+CQKK/OVSFmSJ0FPtg+JLc33q+toV08iG1oLyxvAfoXbrDszZlk16GHEM9E3gO2dOlx4WMMfnaxGXJtLjfwDrU8xRd+PQhPdhQXRdXTSt0DwrNFN1LWXM1uEiP+zevlsF7uvJ+Y3oXCyDamMYlNPWRaTrr6+vl7k99TsPSMlSdsxvVq6++Wpw9ezbYc6qiYdfTEEjZtCm2CvLw4cPlnrmx6YJaEj31UfL1znOMwljVyGyVUK9Z6nGkB00kFjZSQhZL00FgNEYXyLqOECt7RoxxGhZTx8YUJXY6t3A9SWWDbWr7Nm3SRtl9SUvUEzc/JonM19ZltMGUCNLHjh3TRaLnsmKTCCxzjRwdBWCqF8fdGW9POvlA4RsqtsfFKE0FARnuh4bncp37hXOdxC1Rxx0mrgU58kHswIED0eCsmrc+1/d0HVk0LTvvB4HyPdEPq+G5yENkN+fwuhhaAj0tyzWoyCYG0Xev4G+V6ym+V7etmrfQ4ejj5db1/W4qj4+m5XWHAEZ3VO6FLkcLsnzZN5Tskq/RHh8CVaGmWP2I7T1x4sQW4SViSt1tTfl24dsEqUlIJ5kZIbJtEdoyRoXAYoyuRWcd1X03qDBinEKhpnzGFoH52JUSZqqqcb6PatTRH9aqaNj16SKwGKNr0Vmne5O2Kbn+QNZk741QmKkUWbsI6QTfNl4IKfJbmWYILMboAtPSo7M2u1XmUXvoUFM6pBOLKtyUu9cyIZ14AViaDgKLMLr0bvSqoyVGZ53OLdmtpDJtsK5Ptql0FtKpKYLTr78Ioythb2QZ8BKjs07/Vm3eAvQvH1SbhpqKhZkKSQp/FjTI4geZuysfwqReyl7LUlb33CXPjuNGYBFGFxVIdNYjR46sbvpxq8akaxuBpqGmMJL4gesmFkC4IchDsxly3QzI1ES2um2yevkILMLocmMzBWjJ0Vnzb4351RDXAi2rO3VQ6vESz/lwxYrDUHnfbIbUyNHQpGyTNs1P0+Nu0SKM7rhVYNL1gYB2LeQs33VlSw0zpethcIkSwj4NvtTEzSA7pTVpk08my+sOATO63WFrlEeEQGiaVq6IesvHWJgp6Mo3BAwuibBOuSklpBM0x7ifSG5bF1O+uwVvRtkQGBYBvTeBXnIr53U3ApelvKF9G2g1y3qFjz66dapklLpuPXjI9o512zGsdpbJnRfLGk1fzBvGGmoItIQA0w5ZustcW/fjWEssKskw53f//v1l0NbQKrpKIlagVwTWSGZ0e8XcmM0EAeZ+M6QnsQ80boc+E66LO++8s2B5+xD8+2zrnHhhc82nOyeNWlt6Q0DC8DBfd4hNxHVIp74Nfm8gz5SRGd2ZKtaa1T0CTEU8depUwSbiuBv6SjIb4vjx4wUyWJoWAuZemJa+TNoRIiCuhh07dhTvvfdewRSwrhLGHXeCuRS6QrhbuuZe6BZfo74QBBjedx1qCigtpNM8bijr6c5Dj9YKQ8AQmAAC1tOdgJJMREPAEJgXAvYhbV76tNYYAobAyBEwoztyBZl4hoAhMC8EzOjOS5/WGkPAEBg5AmZ0R64gE88QMATmhYAZ3Xnp01pjCBgCI0fAjO7IFWTiGQKGwLwQMKM7L31aawwBQ2DkCJjRHbmCTDxDwBCYFwJmdOelT2uNIWAIjBwBM7ojV5CJZwgYAvNCwIzuvPRprTEEDIGRI2BGd+QKMvEMAUNgXgiY0Z2XPq01hoAhMHIEzOiOXEEmniFgCBgChoAhYAgYAoZATQT+B8zqtWWLMhD1AAAAAElFTkSuQmCC"
}
},
"cell_type": "markdown",
"metadata": {},
"source": [
"## "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 理出计划 \n",
"输入x={department:key0,age:key1,salary:key2}\n",
"输出p(junier|x)和p(senier|y)\n"
]
},
{
"cell_type": "code",
"execution_count": 208,
"metadata": {
"ExecuteTime": {
"end_time": "2019-04-29T14:20:48.953940Z",
"start_time": "2019-04-29T14:20:48.947069Z"
}
},
"outputs": [],
"source": [
"x=pd.Series({'department':'Marketing','age':2125,'salary':'26K30K'})"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 必要知识"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### pandas\n",
" 选择行列:https://blog.csdn.net/qq_38328378/article/details/81166518\n",
" 统计数量与频率iris['Species'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 183,
"metadata": {
"ExecuteTime": {
"end_time": "2019-04-29T13:16:53.697647Z",
"start_time": "2019-04-29T13:16:53.644766Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Help on DataFrame in module pandas.core.frame object:\n",
"\n",
"class DataFrame(pandas.core.generic.NDFrame)\n",
" | Two-dimensional size-mutable, potentially heterogeneous tabular data\n",
" | structure with labeled axes (rows and columns). Arithmetic operations\n",
" | align on both row and column labels. Can be thought of as a dict-like\n",
" | container for Series objects. The primary pandas data structure\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | data : numpy ndarray (structured or homogeneous), dict, or DataFrame\n",
" | Dict can contain Series, arrays, constants, or list-like objects\n",
" | index : Index or array-like\n",
" | Index to use for resulting frame. Will default to np.arange(n) if\n",
" | no indexing information part of input data and no index provided\n",
" | columns : Index or array-like\n",
" | Column labels to use for resulting frame. Will default to\n",
" | np.arange(n) if no column labels are provided\n",
" | dtype : dtype, default None\n",
" | Data type to force. Only a single dtype is allowed. If None, infer\n",
" | copy : boolean, default False\n",
" | Copy data from inputs. Only affects DataFrame / 2d ndarray input\n",
" | \n",
" | Examples\n",
" | --------\n",
" | Constructing DataFrame from a dictionary.\n",
" | \n",
" | >>> d = {'col1': [1, 2], 'col2': [3, 4]}\n",
" | >>> df = pd.DataFrame(data=d)\n",
" | >>> df\n",
" | col1 col2\n",
" | 0 1 3\n",
" | 1 2 4\n",
" | \n",
" | Notice that the inferred dtype is int64.\n",
" | \n",
" | >>> df.dtypes\n",
" | col1 int64\n",
" | col2 int64\n",
" | dtype: object\n",
" | \n",
" | To enforce a single dtype:\n",
" | \n",
" | >>> df = pd.DataFrame(data=d, dtype=np.int8)\n",
" | >>> df.dtypes\n",
" | col1 int8\n",
" | col2 int8\n",
" | dtype: object\n",
" | \n",
" | Constructing DataFrame from numpy ndarray:\n",
" | \n",
" | >>> df2 = pd.DataFrame(np.random.randint(low=0, high=10, size=(5, 5)),\n",
" | ... columns=['a', 'b', 'c', 'd', 'e'])\n",
" | >>> df2\n",
" | a b c d e\n",
" | 0 2 8 8 3 4\n",
" | 1 4 2 9 0 9\n",
" | 2 1 0 7 8 0\n",
" | 3 5 1 7 1 3\n",
" | 4 6 0 2 4 2\n",
" | \n",
" | See also\n",
" | --------\n",
" | DataFrame.from_records : constructor from tuples, also record arrays\n",
" | DataFrame.from_dict : from dicts of Series, arrays, or dicts\n",
" | DataFrame.from_items : from sequence of (key, value) pairs\n",
" | pandas.read_csv, pandas.read_table, pandas.read_clipboard\n",
" | \n",
" | Method resolution order:\n",
" | DataFrame\n",
" | pandas.core.generic.NDFrame\n",
" | pandas.core.base.PandasObject\n",
" | pandas.core.base.StringMixin\n",
" | pandas.core.accessor.DirNamesMixin\n",
" | pandas.core.base.SelectionMixin\n",
" | builtins.object\n",
" | \n",
" | Methods defined here:\n",
" | \n",
" | __add__(self, other, axis=None, level=None, fill_value=None)\n",
" | Binary operator __add__ with support to substitute a fill_value for missing data in\n",
" | one of the inputs\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | other : Series, DataFrame, or constant\n",
" | axis : {0, 1, 'index', 'columns'}\n",
" | For Series input, axis to match Series index on\n",
" | fill_value : None or float value, default None\n",
" | Fill missing (NaN) values with this value. If both DataFrame locations are\n",
" | missing, the result will be missing\n",
" | level : int or name\n",
" | Broadcast across a level, matching Index values on the\n",
" | passed MultiIndex level\n",
" | \n",
" | Notes\n",
" | -----\n",
" | Mismatched indices will be unioned together\n",
" | \n",
" | Returns\n",
" | -------\n",
" | result : DataFrame\n",
" | \n",
" | __and__(self, other, axis='columns', level=None, fill_value=None)\n",
" | Binary operator __and__ with support to substitute a fill_value for missing data in\n",
" | one of the inputs\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | other : Series, DataFrame, or constant\n",
" | axis : {0, 1, 'index', 'columns'}\n",
" | For Series input, axis to match Series index on\n",
" | fill_value : None or float value, default None\n",
" | Fill missing (NaN) values with this value. If both DataFrame locations are\n",
" | missing, the result will be missing\n",
" | level : int or name\n",
" | Broadcast across a level, matching Index values on the\n",
" | passed MultiIndex level\n",
" | \n",
" | Notes\n",
" | -----\n",
" | Mismatched indices will be unioned together\n",
" | \n",
" | Returns\n",
" | -------\n",
" | result : DataFrame\n",
" | \n",
" | __div__ = __truediv__(self, other, axis=None, level=None, fill_value=None)\n",
" | \n",
" | __eq__(self, other)\n",
" | Wrapper for comparison method __eq__\n",
" | \n",
" | __floordiv__(self, other, axis=None, level=None, fill_value=None)\n",
" | Binary operator __floordiv__ with support to substitute a fill_value for missing data in\n",
" | one of the inputs\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | other : Series, DataFrame, or constant\n",
" | axis : {0, 1, 'index', 'columns'}\n",
" | For Series input, axis to match Series index on\n",
" | fill_value : None or float value, default None\n",
" | Fill missing (NaN) values with this value. If both DataFrame locations are\n",
" | missing, the result will be missing\n",
" | level : int or name\n",
" | Broadcast across a level, matching Index values on the\n",
" | passed MultiIndex level\n",
" | \n",
" | Notes\n",
" | -----\n",
" | Mismatched indices will be unioned together\n",
" | \n",
" | Returns\n",
" | -------\n",
" | result : DataFrame\n",
" | \n",
" | __ge__(self, other)\n",
" | Wrapper for comparison method __ge__\n",
" | \n",
" | __getitem__(self, key)\n",
" | \n",
" | __gt__(self, other)\n",
" | Wrapper for comparison method __gt__\n",
" | \n",
" | __iadd__ = f(self, other)\n",
" | \n",
" | __iand__ = f(self, other)\n",
" | \n",
" | __ifloordiv__ = f(self, other)\n",
" | \n",
" | __imod__ = f(self, other)\n",
" | \n",
" | __imul__ = f(self, other)\n",
" | \n",
" | __init__(self, data=None, index=None, columns=None, dtype=None, copy=False)\n",
" | Initialize self. See help(type(self)) for accurate signature.\n",
" | \n",
" | __ior__ = f(self, other)\n",
" | \n",
" | __ipow__ = f(self, other)\n",
" | \n",
" | __isub__ = f(self, other)\n",
" | \n",
" | __itruediv__ = f(self, other)\n",
" | \n",
" | __ixor__ = f(self, other)\n",
" | \n",
" | __le__(self, other)\n",
" | Wrapper for comparison method __le__\n",
" | \n",
" | __len__(self)\n",
" | Returns length of info axis, but here we use the index\n",
" | \n",
" | __lt__(self, other)\n",
" | Wrapper for comparison method __lt__\n",
" | \n",
" | __mod__(self, other, axis=None, level=None, fill_value=None)\n",
" | Binary operator __mod__ with support to substitute a fill_value for missing data in\n",
" | one of the inputs\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | other : Series, DataFrame, or constant\n",
" | axis : {0, 1, 'index', 'columns'}\n",
" | For Series input, axis to match Series index on\n",
" | fill_value : None or float value, default None\n",
" | Fill missing (NaN) values with this value. If both DataFrame locations are\n",
" | missing, the result will be missing\n",
" | level : int or name\n",
" | Broadcast across a level, matching Index values on the\n",
" | passed MultiIndex level\n",
" | \n",
" | Notes\n",
" | -----\n",
" | Mismatched indices will be unioned together\n",
" | \n",
" | Returns\n",
" | -------\n",
" | result : DataFrame\n",
" | \n",
" | __mul__(self, other, axis=None, level=None, fill_value=None)\n",
" | Binary operator __mul__ with support to substitute a fill_value for missing data in\n",
" | one of the inputs\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | other : Series, DataFrame, or constant\n",
" | axis : {0, 1, 'index', 'columns'}\n",
" | For Series input, axis to match Series index on\n",
" | fill_value : None or float value, default None\n",
" | Fill missing (NaN) values with this value. If both DataFrame locations are\n",
" | missing, the result will be missing\n",
" | level : int or name\n",
" | Broadcast across a level, matching Index values on the\n",
" | passed MultiIndex level\n",
" | \n",
" | Notes\n",
" | -----\n",
" | Mismatched indices will be unioned together\n",
" | \n",
" | Returns\n",
" | -------\n",
" | result : DataFrame\n",
" | \n",
" | __ne__(self, other)\n",
" | Wrapper for comparison method __ne__\n",
" | \n",
" | __or__(self, other, axis='columns', level=None, fill_value=None)\n",
" | Binary operator __or__ with support to substitute a fill_value for missing data in\n",
" | one of the inputs\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | other : Series, DataFrame, or constant\n",
" | axis : {0, 1, 'index', 'columns'}\n",
" | For Series input, axis to match Series index on\n",
" | fill_value : None or float value, default None\n",
" | Fill missing (NaN) values with this value. If both DataFrame locations are\n",
" | missing, the result will be missing\n",
" | level : int or name\n",
" | Broadcast across a level, matching Index values on the\n",
" | passed MultiIndex level\n",
" | \n",
" | Notes\n",
" | -----\n",
" | Mismatched indices will be unioned together\n",
" | \n",
" | Returns\n",
" | -------\n",
" | result : DataFrame\n",
" | \n",
" | __pow__(self, other, axis=None, level=None, fill_value=None)\n",
" | Binary operator __pow__ with support to substitute a fill_value for missing data in\n",
" | one of the inputs\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | other : Series, DataFrame, or constant\n",
" | axis : {0, 1, 'index', 'columns'}\n",
" | For Series input, axis to match Series index on\n",
" | fill_value : None or float value, default None\n",
" | Fill missing (NaN) values with this value. If both DataFrame locations are\n",
" | missing, the result will be missing\n",
" | level : int or name\n",
" | Broadcast across a level, matching Index values on the\n",
" | passed MultiIndex level\n",
" | \n",
" | Notes\n",
" | -----\n",
" | Mismatched indices will be unioned together\n",
" | \n",
" | Returns\n",
" | -------\n",
" | result : DataFrame\n",
" | \n",
" | __radd__(self, other, axis=None, level=None, fill_value=None)\n",
" | Binary operator __radd__ with support to substitute a fill_value for missing data in\n",
" | one of the inputs\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | other : Series, DataFrame, or constant\n",
" | axis : {0, 1, 'index', 'columns'}\n",
" | For Series input, axis to match Series index on\n",
" | fill_value : None or float value, default None\n",
" | Fill missing (NaN) values with this value. If both DataFrame locations are\n",
" | missing, the result will be missing\n",
" | level : int or name\n",
" | Broadcast across a level, matching Index values on the\n",
" | passed MultiIndex level\n",
" | \n",
" | Notes\n",
" | -----\n",
" | Mismatched indices will be unioned together\n",
" | \n",
" | Returns\n",
" | -------\n",
" | result : DataFrame\n",
" | \n",
" | __rand__(self, other, axis='columns', level=None, fill_value=None)\n",
" | Binary operator __rand__ with support to substitute a fill_value for missing data in\n",
" | one of the inputs\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | other : Series, DataFrame, or constant\n",
" | axis : {0, 1, 'index', 'columns'}\n",
" | For Series input, axis to match Series index on\n",
" | fill_value : None or float value, default None\n",
" | Fill missing (NaN) values with this value. If both DataFrame locations are\n",
" | missing, the result will be missing\n",
" | level : int or name\n",
" | Broadcast across a level, matching Index values on the\n",
" | passed MultiIndex level\n",
" | \n",
" | Notes\n",
" | -----\n",
" | Mismatched indices will be unioned together\n",
" | \n",
" | Returns\n",
" | -------\n",
" | result : DataFrame\n",
" | \n",
" | __rdiv__ = __rtruediv__(self, other, axis=None, level=None, fill_value=None)\n",
" | \n",
" | __rfloordiv__(self, other, axis=None, level=None, fill_value=None)\n",
" | Binary operator __rfloordiv__ with support to substitute a fill_value for missing data in\n",
" | one of the inputs\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | other : Series, DataFrame, or constant\n",
" | axis : {0, 1, 'index', 'columns'}\n",
" | For Series input, axis to match Series index on\n",
" | fill_value : None or float value, default None\n",
" | Fill missing (NaN) values with this value. If both DataFrame locations are\n",
" | missing, the result will be missing\n",
" | level : int or name\n",
" | Broadcast across a level, matching Index values on the\n",
" | passed MultiIndex level\n",
" | \n",
" | Notes\n",
" | -----\n",
" | Mismatched indices will be unioned together\n",
" | \n",
" | Returns\n",
" | -------\n",
" | result : DataFrame\n",
" | \n",
" | __rmod__(self, other, axis=None, level=None, fill_value=None)\n",
" | Binary operator __rmod__ with support to substitute a fill_value for missing data in\n",
" | one of the inputs\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | other : Series, DataFrame, or constant\n",
" | axis : {0, 1, 'index', 'columns'}\n",
" | For Series input, axis to match Series index on\n",
" | fill_value : None or float value, default None\n",
" | Fill missing (NaN) values with this value. If both DataFrame locations are\n",
" | missing, the result will be missing\n",
" | level : int or name\n",
" | Broadcast across a level, matching Index values on the\n",
" | passed MultiIndex level\n",
" | \n",
" | Notes\n",
" | -----\n",
" | Mismatched indices will be unioned together\n",
" | \n",
" | Returns\n",
" | -------\n",
" | result : DataFrame\n",
" | \n",
" | __rmul__(self, other, axis=None, level=None, fill_value=None)\n",
" | Binary operator __rmul__ with support to substitute a fill_value for missing data in\n",
" | one of the inputs\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | other : Series, DataFrame, or constant\n",
" | axis : {0, 1, 'index', 'columns'}\n",
" | For Series input, axis to match Series index on\n",
" | fill_value : None or float value, default None\n",
" | Fill missing (NaN) values with this value. If both DataFrame locations are\n",
" | missing, the result will be missing\n",
" | level : int or name\n",
" | Broadcast across a level, matching Index values on the\n",
" | passed MultiIndex level\n",
" | \n",
" | Notes\n",
" | -----\n",
" | Mismatched indices will be unioned together\n",
" | \n",
" | Returns\n",
" | -------\n",
" | result : DataFrame\n",
" | \n",
" | __ror__(self, other, axis='columns', level=None, fill_value=None)\n",
" | Binary operator __ror__ with support to substitute a fill_value for missing data in\n",
" | one of the inputs\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | other : Series, DataFrame, or constant\n",
" | axis : {0, 1, 'index', 'columns'}\n",
" | For Series input, axis to match Series index on\n",
" | fill_value : None or float value, default None\n",
" | Fill missing (NaN) values with this value. If both DataFrame locations are\n",
" | missing, the result will be missing\n",
" | level : int or name\n",
" | Broadcast across a level, matching Index values on the\n",
" | passed MultiIndex level\n",
" | \n",
" | Notes\n",
" | -----\n",
" | Mismatched indices will be unioned together\n",
" | \n",
" | Returns\n",
" | -------\n",
" | result : DataFrame\n",
" | \n",
" | __rpow__(self, other, axis=None, level=None, fill_value=None)\n",
" | Binary operator __rpow__ with support to substitute a fill_value for missing data in\n",
" | one of the inputs\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | other : Series, DataFrame, or constant\n",
" | axis : {0, 1, 'index', 'columns'}\n",
" | For Series input, axis to match Series index on\n",
" | fill_value : None or float value, default None\n",
" | Fill missing (NaN) values with this value. If both DataFrame locations are\n",
" | missing, the result will be missing\n",
" | level : int or name\n",
" | Broadcast across a level, matching Index values on the\n",
" | passed MultiIndex level\n",
" | \n",
" | Notes\n",
" | -----\n",
" | Mismatched indices will be unioned together\n",
" | \n",
" | Returns\n",
" | -------\n",
" | result : DataFrame\n",
" | \n",
" | __rsub__(self, other, axis=None, level=None, fill_value=None)\n",
" | Binary operator __rsub__ with support to substitute a fill_value for missing data in\n",
" | one of the inputs\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | other : Series, DataFrame, or constant\n",
" | axis : {0, 1, 'index', 'columns'}\n",
" | For Series input, axis to match Series index on\n",
" | fill_value : None or float value, default None\n",
" | Fill missing (NaN) values with this value. If both DataFrame locations are\n",
" | missing, the result will be missing\n",
" | level : int or name\n",
" | Broadcast across a level, matching Index values on the\n",
" | passed MultiIndex level\n",
" | \n",
" | Notes\n",
" | -----\n",
" | Mismatched indices will be unioned together\n",
" | \n",
" | Returns\n",
" | -------\n",
" | result : DataFrame\n",
" | \n",
" | __rtruediv__(self, other, axis=None, level=None, fill_value=None)\n",
" | Binary operator __rtruediv__ with support to substitute a fill_value for missing data in\n",
" | one of the inputs\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | other : Series, DataFrame, or constant\n",
" | axis : {0, 1, 'index', 'columns'}\n",
" | For Series input, axis to match Series index on\n",
" | fill_value : None or float value, default None\n",
" | Fill missing (NaN) values with this value. If both DataFrame locations are\n",
" | missing, the result will be missing\n",
" | level : int or name\n",
" | Broadcast across a level, matching Index values on the\n",
" | passed MultiIndex level\n",
" | \n",
" | Notes\n",
" | -----\n",
" | Mismatched indices will be unioned together\n",
" | \n",
" | Returns\n",
" | -------\n",
" | result : DataFrame\n",
" | \n",
" | __rxor__(self, other, axis='columns', level=None, fill_value=None)\n",
" | Binary operator __rxor__ with support to substitute a fill_value for missing data in\n",
" | one of the inputs\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | other : Series, DataFrame, or constant\n",
" | axis : {0, 1, 'index', 'columns'}\n",
" | For Series input, axis to match Series index on\n",
" | fill_value : None or float value, default None\n",
" | Fill missing (NaN) values with this value. If both DataFrame locations are\n",
" | missing, the result will be missing\n",
" | level : int or name\n",
" | Broadcast across a level, matching Index values on the\n",
" | passed MultiIndex level\n",
" | \n",
" | Notes\n",
" | -----\n",
" | Mismatched indices will be unioned together\n",
" | \n",
" | Returns\n",
" | -------\n",
" | result : DataFrame\n",
" | \n",
" | __setitem__(self, key, value)\n",
" | \n",
" | __sub__(self, other, axis=None, level=None, fill_value=None)\n",
" | Binary operator __sub__ with support to substitute a fill_value for missing data in\n",
" | one of the inputs\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | other : Series, DataFrame, or constant\n",
" | axis : {0, 1, 'index', 'columns'}\n",
" | For Series input, axis to match Series index on\n",
" | fill_value : None or float value, default None\n",
" | Fill missing (NaN) values with this value. If both DataFrame locations are\n",
" | missing, the result will be missing\n",
" | level : int or name\n",
" | Broadcast across a level, matching Index values on the\n",
" | passed MultiIndex level\n",
" | \n",
" | Notes\n",
" | -----\n",
" | Mismatched indices will be unioned together\n",
" | \n",
" | Returns\n",
" | -------\n",
" | result : DataFrame\n",
" | \n",
" | __truediv__(self, other, axis=None, level=None, fill_value=None)\n",
" | Binary operator __truediv__ with support to substitute a fill_value for missing data in\n",
" | one of the inputs\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | other : Series, DataFrame, or constant\n",
" | axis : {0, 1, 'index', 'columns'}\n",
" | For Series input, axis to match Series index on\n",
" | fill_value : None or float value, default None\n",
" | Fill missing (NaN) values with this value. If both DataFrame locations are\n",
" | missing, the result will be missing\n",
" | level : int or name\n",
" | Broadcast across a level, matching Index values on the\n",
" | passed MultiIndex level\n",
" | \n",
" | Notes\n",
" | -----\n",
" | Mismatched indices will be unioned together\n",
" | \n",
" | Returns\n",
" | -------\n",
" | result : DataFrame\n",
" | \n",
" | __unicode__(self)\n",
" | Return a string representation for a particular DataFrame\n",
" | \n",
" | Invoked by unicode(df) in py2 only. Yields a Unicode String in both\n",
" | py2/py3.\n",
" | \n",
" | __xor__(self, other, axis='columns', level=None, fill_value=None)\n",
" | Binary operator __xor__ with support to substitute a fill_value for missing data in\n",
" | one of the inputs\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | other : Series, DataFrame, or constant\n",
" | axis : {0, 1, 'index', 'columns'}\n",
" | For Series input, axis to match Series index on\n",
" | fill_value : None or float value, default None\n",
" | Fill missing (NaN) values with this value. If both DataFrame locations are\n",
" | missing, the result will be missing\n",
" | level : int or name\n",
" | Broadcast across a level, matching Index values on the\n",
" | passed MultiIndex level\n",
" | \n",
" | Notes\n",
" | -----\n",
" | Mismatched indices will be unioned together\n",
" | \n",
" | Returns\n",
" | -------\n",
" | result : DataFrame\n",
" | \n",
" | add(self, other, axis='columns', level=None, fill_value=None)\n",
" | Addition of dataframe and other, element-wise (binary operator `add`).\n",
" | \n",
" | Equivalent to ``dataframe + other``, but with support to substitute a fill_value for\n",
" | missing data in one of the inputs.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | other : Series, DataFrame, or constant\n",
" | axis : {0, 1, 'index', 'columns'}\n",
" | For Series input, axis to match Series index on\n",
" | fill_value : None or float value, default None\n",
" | Fill missing (NaN) values with this value. If both DataFrame\n",
" | locations are missing, the result will be missing\n",
" | level : int or name\n",
" | Broadcast across a level, matching Index values on the\n",
" | passed MultiIndex level\n",
" | \n",
" | Notes\n",
" | -----\n",
" | Mismatched indices will be unioned together\n",
" | \n",
" | Returns\n",
" | -------\n",
" | result : DataFrame\n",
" | \n",
" | See also\n",
" | --------\n",
" | DataFrame.radd\n",
" | \n",
" | agg = aggregate(self, func, axis=0, *args, **kwargs)\n",
" | \n",
" | aggregate(self, func, axis=0, *args, **kwargs)\n",
" | Aggregate using callable, string, dict, or list of string/callables\n",
" | \n",
" | .. versionadded:: 0.20.0\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | func : callable, string, dictionary, or list of string/callables\n",
" | Function to use for aggregating the data. If a function, must either\n",
" | work when passed a DataFrame or when passed to DataFrame.apply. For\n",
" | a DataFrame, can pass a dict, if the keys are DataFrame column names.\n",
" | \n",
" | Accepted Combinations are:\n",
" | \n",
" | - string function name\n",
" | - function\n",
" | - list of functions\n",
" | - dict of column names -> functions (or list of functions)\n",
" | \n",
" | Notes\n",
" | -----\n",
" | Numpy functions mean/median/prod/sum/std/var are special cased so the\n",
" | default behavior is applying the function along axis=0\n",
" | (e.g., np.mean(arr_2d, axis=0)) as opposed to\n",
" | mimicking the default Numpy behavior (e.g., np.mean(arr_2d)).\n",
" | \n",
" | `agg` is an alias for `aggregate`. Use the alias.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | aggregated : DataFrame\n",
" | \n",
" | Examples\n",
" | --------\n",
" | \n",
" | >>> df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'],\n",
" | ... index=pd.date_range('1/1/2000', periods=10))\n",
" | >>> df.iloc[3:7] = np.nan\n",
" | \n",
" | Aggregate these functions across all columns\n",
" | \n",
" | >>> df.agg(['sum', 'min'])\n",
" | A B C\n",
" | sum -0.182253 -0.614014 -2.909534\n",
" | min -1.916563 -1.460076 -1.568297\n",
" | \n",
" | Different aggregations per column\n",
" | \n",
" | >>> df.agg({'A' : ['sum', 'min'], 'B' : ['min', 'max']})\n",
" | A B\n",
" | max NaN 1.514318\n",
" | min -1.916563 -1.460076\n",
" | sum -0.182253 NaN\n",
" | \n",
" | See also\n",
" | --------\n",
" | pandas.DataFrame.apply\n",
" | pandas.DataFrame.transform\n",
" | pandas.DataFrame.groupby.aggregate\n",
" | pandas.DataFrame.resample.aggregate\n",
" | pandas.DataFrame.rolling.aggregate\n",
" | \n",
" | align(self, other, join='outer', axis=None, level=None, copy=True, fill_value=None, method=None, limit=None, fill_axis=0, broadcast_axis=None)\n",
" | Align two objects on their axes with the\n",
" | specified join method for each axis Index\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | other : DataFrame or Series\n",
" | join : {'outer', 'inner', 'left', 'right'}, default 'outer'\n",
" | axis : allowed axis of the other object, default None\n",
" | Align on index (0), columns (1), or both (None)\n",
" | level : int or level name, default None\n",
" | Broadcast across a level, matching Index values on the\n",
" | passed MultiIndex level\n",
" | copy : boolean, default True\n",
" | Always returns new objects. If copy=False and no reindexing is\n",
" | required then original objects are returned.\n",
" | fill_value : scalar, default np.NaN\n",
" | Value to use for missing values. Defaults to NaN, but can be any\n",
" | \"compatible\" value\n",
" | method : str, default None\n",
" | limit : int, default None\n",
" | fill_axis : {0 or 'index', 1 or 'columns'}, default 0\n",
" | Filling axis, method and limit\n",
" | broadcast_axis : {0 or 'index', 1 or 'columns'}, default None\n",
" | Broadcast values along this axis, if aligning two objects of\n",
" | different dimensions\n",
" | \n",
" | .. versionadded:: 0.17.0\n",
" | \n",
" | Returns\n",
" | -------\n",
" | (left, right) : (DataFrame, type of other)\n",
" | Aligned objects\n",
" | \n",
" | all(self, axis=None, bool_only=None, skipna=None, level=None, **kwargs)\n",
" | Return whether all elements are True over requested axis\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | axis : {index (0), columns (1)}\n",
" | skipna : boolean, default True\n",
" | Exclude NA/null values. If an entire row/column is NA, the result\n",
" | will be NA\n",
" | level : int or level name, default None\n",
" | If the axis is a MultiIndex (hierarchical), count along a\n",
" | particular level, collapsing into a Series\n",
" | bool_only : boolean, default None\n",
" | Include only boolean columns. If None, will attempt to use everything,\n",
" | then use only boolean data. Not implemented for Series.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | all : Series or DataFrame (if level specified)\n",
" | \n",
" | any(self, axis=None, bool_only=None, skipna=None, level=None, **kwargs)\n",
" | Return whether any element is True over requested axis\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | axis : {index (0), columns (1)}\n",
" | skipna : boolean, default True\n",
" | Exclude NA/null values. If an entire row/column is NA, the result\n",
" | will be NA\n",
" | level : int or level name, default None\n",
" | If the axis is a MultiIndex (hierarchical), count along a\n",
" | particular level, collapsing into a Series\n",
" | bool_only : boolean, default None\n",
" | Include only boolean columns. If None, will attempt to use everything,\n",
" | then use only boolean data. Not implemented for Series.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | any : Series or DataFrame (if level specified)\n",
" | \n",
" | append(self, other, ignore_index=False, verify_integrity=False)\n",
" | Append rows of `other` to the end of this frame, returning a new\n",
" | object. Columns not in this frame are added as new columns.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | other : DataFrame or Series/dict-like object, or list of these\n",
" | The data to append.\n",
" | ignore_index : boolean, default False\n",
" | If True, do not use the index labels.\n",
" | verify_integrity : boolean, default False\n",
" | If True, raise ValueError on creating index with duplicates.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | appended : DataFrame\n",
" | \n",
" | Notes\n",
" | -----\n",
" | If a list of dict/series is passed and the keys are all contained in\n",
" | the DataFrame's index, the order of the columns in the resulting\n",
" | DataFrame will be unchanged.\n",
" | \n",
" | Iteratively appending rows to a DataFrame can be more computationally\n",
" | intensive than a single concatenate. A better solution is to append\n",
" | those rows to a list and then concatenate the list with the original\n",
" | DataFrame all at once.\n",
" | \n",
" | See also\n",
" | --------\n",
" | pandas.concat : General function to concatenate DataFrame, Series\n",
" | or Panel objects\n",
" | \n",
" | Examples\n",
" | --------\n",
" | \n",
" | >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=list('AB'))\n",
" | >>> df\n",
" | A B\n",
" | 0 1 2\n",
" | 1 3 4\n",
" | >>> df2 = pd.DataFrame([[5, 6], [7, 8]], columns=list('AB'))\n",
" | >>> df.append(df2)\n",
" | A B\n",
" | 0 1 2\n",
" | 1 3 4\n",
" | 0 5 6\n",
" | 1 7 8\n",
" | \n",
" | With `ignore_index` set to True:\n",
" | \n",
" | >>> df.append(df2, ignore_index=True)\n",
" | A B\n",
" | 0 1 2\n",
" | 1 3 4\n",
" | 2 5 6\n",
" | 3 7 8\n",
" | \n",
" | The following, while not recommended methods for generating DataFrames,\n",
" | show two ways to generate a DataFrame from multiple data sources.\n",
" | \n",
" | Less efficient:\n",
" | \n",
" | >>> df = pd.DataFrame(columns=['A'])\n",
" | >>> for i in range(5):\n",
" | ... df = df.append({'A': i}, ignore_index=True)\n",
" | >>> df\n",
" | A\n",
" | 0 0\n",
" | 1 1\n",
" | 2 2\n",
" | 3 3\n",
" | 4 4\n",
" | \n",
" | More efficient:\n",
" | \n",
" | >>> pd.concat([pd.DataFrame([i], columns=['A']) for i in range(5)],\n",
" | ... ignore_index=True)\n",
" | A\n",
" | 0 0\n",
" | 1 1\n",
" | 2 2\n",
" | 3 3\n",
" | 4 4\n",
" | \n",
" | apply(self, func, axis=0, broadcast=False, raw=False, reduce=None, args=(), **kwds)\n",
" | Applies function along input axis of DataFrame.\n",
" | \n",
" | Objects passed to functions are Series objects having index\n",
" | either the DataFrame's index (axis=0) or the columns (axis=1).\n",
" | Return type depends on whether passed function aggregates, or the\n",
" | reduce argument if the DataFrame is empty.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | func : function\n",
" | Function to apply to each column/row\n",
" | axis : {0 or 'index', 1 or 'columns'}, default 0\n",
" | * 0 or 'index': apply function to each column\n",
" | * 1 or 'columns': apply function to each row\n",
" | broadcast : boolean, default False\n",
" | For aggregation functions, return object of same size with values\n",
" | propagated\n",
" | raw : boolean, default False\n",
" | If False, convert each row or column into a Series. If raw=True the\n",
" | passed function will receive ndarray objects instead. If you are\n",
" | just applying a NumPy reduction function this will achieve much\n",
" | better performance\n",
" | reduce : boolean or None, default None\n",
" | Try to apply reduction procedures. If the DataFrame is empty,\n",
" | apply will use reduce to determine whether the result should be a\n",
" | Series or a DataFrame. If reduce is None (the default), apply's\n",
" | return value will be guessed by calling func an empty Series (note:\n",
" | while guessing, exceptions raised by func will be ignored). If\n",
" | reduce is True a Series will always be returned, and if False a\n",
" | DataFrame will always be returned.\n",
" | args : tuple\n",
" | Positional arguments to pass to function in addition to the\n",
" | array/series\n",
" | Additional keyword arguments will be passed as keywords to the function\n",
" | \n",
" | Notes\n",
" | -----\n",
" | In the current implementation apply calls func twice on the\n",
" | first column/row to decide whether it can take a fast or slow\n",
" | code path. This can lead to unexpected behavior if func has\n",
" | side-effects, as they will take effect twice for the first\n",
" | column/row.\n",
" | \n",
" | Examples\n",
" | --------\n",
" | >>> df.apply(numpy.sqrt) # returns DataFrame\n",
" | >>> df.apply(numpy.sum, axis=0) # equiv to df.sum(0)\n",
" | >>> df.apply(numpy.sum, axis=1) # equiv to df.sum(1)\n",
" | \n",
" | See also\n",
" | --------\n",
" | DataFrame.applymap: For elementwise operations\n",
" | DataFrame.aggregate: only perform aggregating type operations\n",
" | DataFrame.transform: only perform transformating type operations\n",
" | \n",
" | Returns\n",
" | -------\n",
" | applied : Series or DataFrame\n",
" | \n",
" | applymap(self, func)\n",
" | Apply a function to a DataFrame that is intended to operate\n",
" | elementwise, i.e. like doing map(func, series) for each series in the\n",
" | DataFrame\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | func : function\n",
" | Python function, returns a single value from a single value\n",
" | \n",
" | Examples\n",
" | --------\n",
" | \n",
" | >>> df = pd.DataFrame(np.random.randn(3, 3))\n",
" | >>> df\n",
" | 0 1 2\n",
" | 0 -0.029638 1.081563 1.280300\n",
" | 1 0.647747 0.831136 -1.549481\n",
" | 2 0.513416 -0.884417 0.195343\n",
" | >>> df = df.applymap(lambda x: '%.2f' % x)\n",
" | >>> df\n",
" | 0 1 2\n",
" | 0 -0.03 1.08 1.28\n",
" | 1 0.65 0.83 -1.55\n",
" | 2 0.51 -0.88 0.20\n",
" | \n",
" | Returns\n",
" | -------\n",
" | applied : DataFrame\n",
" | \n",
" | See also\n",
" | --------\n",
" | DataFrame.apply : For operations on rows/columns\n",
" | \n",
" | assign(self, **kwargs)\n",
" | Assign new columns to a DataFrame, returning a new object\n",
" | (a copy) with all the original columns in addition to the new ones.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | kwargs : keyword, value pairs\n",
" | keywords are the column names. If the values are\n",
" | callable, they are computed on the DataFrame and\n",
" | assigned to the new columns. The callable must not\n",
" | change input DataFrame (though pandas doesn't check it).\n",
" | If the values are not callable, (e.g. a Series, scalar, or array),\n",
" | they are simply assigned.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | df : DataFrame\n",
" | A new DataFrame with the new columns in addition to\n",
" | all the existing columns.\n",
" | \n",
" | Notes\n",
" | -----\n",
" | For python 3.6 and above, the columns are inserted in the order of\n",
" | \\*\\*kwargs. For python 3.5 and earlier, since \\*\\*kwargs is unordered,\n",
" | the columns are inserted in alphabetical order at the end of your\n",
" | DataFrame. Assigning multiple columns within the same ``assign``\n",
" | is possible, but you cannot reference other columns created within\n",
" | the same ``assign`` call.\n",
" | \n",
" | Examples\n",
" | --------\n",
" | >>> df = DataFrame({'A': range(1, 11), 'B': np.random.randn(10)})\n",
" | \n",
" | Where the value is a callable, evaluated on `df`:\n",
" | \n",
" | >>> df.assign(ln_A = lambda x: np.log(x.A))\n",
" | A B ln_A\n",
" | 0 1 0.426905 0.000000\n",
" | 1 2 -0.780949 0.693147\n",
" | 2 3 -0.418711 1.098612\n",
" | 3 4 -0.269708 1.386294\n",
" | 4 5 -0.274002 1.609438\n",
" | 5 6 -0.500792 1.791759\n",
" | 6 7 1.649697 1.945910\n",
" | 7 8 -1.495604 2.079442\n",
" | 8 9 0.549296 2.197225\n",
" | 9 10 -0.758542 2.302585\n",
" | \n",
" | Where the value already exists and is inserted:\n",
" | \n",
" | >>> newcol = np.log(df['A'])\n",
" | >>> df.assign(ln_A=newcol)\n",
" | A B ln_A\n",
" | 0 1 0.426905 0.000000\n",
" | 1 2 -0.780949 0.693147\n",
" | 2 3 -0.418711 1.098612\n",
" | 3 4 -0.269708 1.386294\n",
" | 4 5 -0.274002 1.609438\n",
" | 5 6 -0.500792 1.791759\n",
" | 6 7 1.649697 1.945910\n",
" | 7 8 -1.495604 2.079442\n",
" | 8 9 0.549296 2.197225\n",
" | 9 10 -0.758542 2.302585\n",
" | \n",
" | boxplot = boxplot_frame(self, column=None, by=None, ax=None, fontsize=None, rot=0, grid=True, figsize=None, layout=None, return_type=None, **kwds)\n",
" | Make a box plot from DataFrame column optionally grouped by some columns or\n",
" | other inputs\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | data : the pandas object holding the data\n",
" | column : column name or list of names, or vector\n",
" | Can be any valid input to groupby\n",
" | by : string or sequence\n",
" | Column in the DataFrame to group by\n",
" | ax : Matplotlib axes object, optional\n",
" | fontsize : int or string\n",
" | rot : label rotation angle\n",
" | figsize : A tuple (width, height) in inches\n",
" | grid : Setting this to True will show the grid\n",
" | layout : tuple (optional)\n",
" | (rows, columns) for the layout of the plot\n",
" | return_type : {None, 'axes', 'dict', 'both'}, default None\n",
" | The kind of object to return. The default is ``axes``\n",
" | 'axes' returns the matplotlib axes the boxplot is drawn on;\n",
" | 'dict' returns a dictionary whose values are the matplotlib\n",
" | Lines of the boxplot;\n",
" | 'both' returns a namedtuple with the axes and dict.\n",
" | \n",
" | When grouping with ``by``, a Series mapping columns to ``return_type``\n",
" | is returned, unless ``return_type`` is None, in which case a NumPy\n",
" | array of axes is returned with the same shape as ``layout``.\n",
" | See the prose documentation for more.\n",
" | \n",
" | kwds : other plotting keyword arguments to be passed to matplotlib boxplot\n",
" | function\n",
" | \n",
" | Returns\n",
" | -------\n",
" | lines : dict\n",
" | ax : matplotlib Axes\n",
" | (ax, lines): namedtuple\n",
" | \n",
" | Notes\n",
" | -----\n",
" | Use ``return_type='dict'`` when you want to tweak the appearance\n",
" | of the lines after plotting. In this case a dict containing the Lines\n",
" | making up the boxes, caps, fliers, medians, and whiskers is returned.\n",
" | \n",
" | combine(self, other, func, fill_value=None, overwrite=True)\n",
" | Add two DataFrame objects and do not propagate NaN values, so if for a\n",
" | (column, time) one frame is missing a value, it will default to the\n",
" | other frame's value (which might be NaN as well)\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | other : DataFrame\n",
" | func : function\n",
" | Function that takes two series as inputs and return a Series or a\n",
" | scalar\n",
" | fill_value : scalar value\n",
" | overwrite : boolean, default True\n",
" | If True then overwrite values for common keys in the calling frame\n",
" | \n",
" | Returns\n",
" | -------\n",
" | result : DataFrame\n",
" | \n",
" | Examples\n",
" | --------\n",
" | >>> df1 = DataFrame({'A': [0, 0], 'B': [4, 4]})\n",
" | >>> df2 = DataFrame({'A': [1, 1], 'B': [3, 3]})\n",
" | >>> df1.combine(df2, lambda s1, s2: s1 if s1.sum() < s2.sum() else s2)\n",
" | A B\n",
" | 0 0 3\n",
" | 1 0 3\n",
" | \n",
" | See Also\n",
" | --------\n",
" | DataFrame.combine_first : Combine two DataFrame objects and default to\n",
" | non-null values in frame calling the method\n",
" | \n",
" | combine_first(self, other)\n",
" | Combine two DataFrame objects and default to non-null values in frame\n",
" | calling the method. Result index columns will be the union of the\n",
" | respective indexes and columns\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | other : DataFrame\n",
" | \n",
" | Returns\n",
" | -------\n",
" | combined : DataFrame\n",
" | \n",
" | Examples\n",
" | --------\n",
" | df1's values prioritized, use values from df2 to fill holes:\n",
" | \n",
" | >>> df1 = pd.DataFrame([[1, np.nan]])\n",
" | >>> df2 = pd.DataFrame([[3, 4]])\n",
" | >>> df1.combine_first(df2)\n",
" | 0 1\n",
" | 0 1 4.0\n",
" | \n",
" | See Also\n",
" | --------\n",
" | DataFrame.combine : Perform series-wise operation on two DataFrames\n",
" | using a given function\n",
" | \n",
" | compound(self, axis=None, skipna=None, level=None)\n",
" | Return the compound percentage of the values for the requested axis\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | axis : {index (0), columns (1)}\n",
" | skipna : boolean, default True\n",
" | Exclude NA/null values when computing the result.\n",
" | level : int or level name, default None\n",
" | If the axis is a MultiIndex (hierarchical), count along a\n",
" | particular level, collapsing into a Series\n",
" | numeric_only : boolean, default None\n",
" | Include only float, int, boolean columns. If None, will attempt to use\n",
" | everything, then use only numeric data. Not implemented for Series.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | compounded : Series or DataFrame (if level specified)\n",
" | \n",
" | corr(self, method='pearson', min_periods=1)\n",
" | Compute pairwise correlation of columns, excluding NA/null values\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | method : {'pearson', 'kendall', 'spearman'}\n",
" | * pearson : standard correlation coefficient\n",
" | * kendall : Kendall Tau correlation coefficient\n",
" | * spearman : Spearman rank correlation\n",
" | min_periods : int, optional\n",
" | Minimum number of observations required per pair of columns\n",
" | to have a valid result. Currently only available for pearson\n",
" | and spearman correlation\n",
" | \n",
" | Returns\n",
" | -------\n",
" | y : DataFrame\n",
" | \n",
" | corrwith(self, other, axis=0, drop=False)\n",
" | Compute pairwise correlation between rows or columns of two DataFrame\n",
" | objects.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | other : DataFrame\n",
" | axis : {0 or 'index', 1 or 'columns'}, default 0\n",
" | 0 or 'index' to compute column-wise, 1 or 'columns' for row-wise\n",
" | drop : boolean, default False\n",
" | Drop missing indices from result, default returns union of all\n",
" | \n",
" | Returns\n",
" | -------\n",
" | correls : Series\n",
" | \n",
" | count(self, axis=0, level=None, numeric_only=False)\n",
" | Return Series with number of non-NA/null observations over requested\n",
" | axis. Works with non-floating point data as well (detects NaN and None)\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | axis : {0 or 'index', 1 or 'columns'}, default 0\n",
" | 0 or 'index' for row-wise, 1 or 'columns' for column-wise\n",
" | level : int or level name, default None\n",
" | If the axis is a MultiIndex (hierarchical), count along a\n",
" | particular level, collapsing into a DataFrame\n",
" | numeric_only : boolean, default False\n",
" | Include only float, int, boolean data\n",
" | \n",
" | Returns\n",
" | -------\n",
" | count : Series (or DataFrame if level specified)\n",
" | \n",
" | cov(self, min_periods=None)\n",
" | Compute pairwise covariance of columns, excluding NA/null values\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | min_periods : int, optional\n",
" | Minimum number of observations required per pair of columns\n",
" | to have a valid result.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | y : DataFrame\n",
" | \n",
" | Notes\n",
" | -----\n",
" | `y` contains the covariance matrix of the DataFrame's time series.\n",
" | The covariance is normalized by N-1 (unbiased estimator).\n",
" | \n",
" | cummax(self, axis=None, skipna=True, *args, **kwargs)\n",
" | Return cumulative max over requested axis.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | axis : {index (0), columns (1)}\n",
" | skipna : boolean, default True\n",
" | Exclude NA/null values. If an entire row/column is NA, the result\n",
" | will be NA\n",
" | \n",
" | Returns\n",
" | -------\n",
" | cummax : Series\n",
" | \n",
" | \n",
" | \n",
" | See also\n",
" | --------\n",
" | pandas.core.window.Expanding.max : Similar functionality\n",
" | but ignores ``NaN`` values.\n",
" | \n",
" | cummin(self, axis=None, skipna=True, *args, **kwargs)\n",
" | Return cumulative minimum over requested axis.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | axis : {index (0), columns (1)}\n",
" | skipna : boolean, default True\n",
" | Exclude NA/null values. If an entire row/column is NA, the result\n",
" | will be NA\n",
" | \n",
" | Returns\n",
" | -------\n",
" | cummin : Series\n",
" | \n",
" | \n",
" | \n",
" | See also\n",
" | --------\n",
" | pandas.core.window.Expanding.min : Similar functionality\n",
" | but ignores ``NaN`` values.\n",
" | \n",
" | cumprod(self, axis=None, skipna=True, *args, **kwargs)\n",
" | Return cumulative product over requested axis.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | axis : {index (0), columns (1)}\n",
" | skipna : boolean, default True\n",
" | Exclude NA/null values. If an entire row/column is NA, the result\n",
" | will be NA\n",
" | \n",
" | Returns\n",
" | -------\n",
" | cumprod : Series\n",
" | \n",
" | \n",
" | \n",
" | See also\n",
" | --------\n",
" | pandas.core.window.Expanding.prod : Similar functionality\n",
" | but ignores ``NaN`` values.\n",
" | \n",
" | cumsum(self, axis=None, skipna=True, *args, **kwargs)\n",
" | Return cumulative sum over requested axis.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | axis : {index (0), columns (1)}\n",
" | skipna : boolean, default True\n",
" | Exclude NA/null values. If an entire row/column is NA, the result\n",
" | will be NA\n",
" | \n",
" | Returns\n",
" | -------\n",
" | cumsum : Series\n",
" | \n",
" | \n",
" | \n",
" | See also\n",
" | --------\n",
" | pandas.core.window.Expanding.sum : Similar functionality\n",
" | but ignores ``NaN`` values.\n",
" | \n",
" | diff(self, periods=1, axis=0)\n",
" | 1st discrete difference of object\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | periods : int, default 1\n",
" | Periods to shift for forming difference\n",
" | axis : {0 or 'index', 1 or 'columns'}, default 0\n",
" | Take difference over rows (0) or columns (1).\n",
" | \n",
" | .. versionadded: 0.16.1\n",
" | \n",
" | Returns\n",
" | -------\n",
" | diffed : DataFrame\n",
" | \n",
" | div = truediv(self, other, axis='columns', level=None, fill_value=None)\n",
" | \n",
" | divide = truediv(self, other, axis='columns', level=None, fill_value=None)\n",
" | \n",
" | dot(self, other)\n",
" | Matrix multiplication with DataFrame or Series objects\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | other : DataFrame or Series\n",
" | \n",
" | Returns\n",
" | -------\n",
" | dot_product : DataFrame or Series\n",
" | \n",
" | drop_duplicates(self, subset=None, keep='first', inplace=False)\n",
" | Return DataFrame with duplicate rows removed, optionally only\n",
" | considering certain columns\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | subset : column label or sequence of labels, optional\n",
" | Only consider certain columns for identifying duplicates, by\n",
" | default use all of the columns\n",
" | keep : {'first', 'last', False}, default 'first'\n",
" | - ``first`` : Drop duplicates except for the first occurrence.\n",
" | - ``last`` : Drop duplicates except for the last occurrence.\n",
" | - False : Drop all duplicates.\n",
" | inplace : boolean, default False\n",
" | Whether to drop duplicates in place or to return a copy\n",
" | \n",
" | Returns\n",
" | -------\n",
" | deduplicated : DataFrame\n",
" | \n",
" | dropna(self, axis=0, how='any', thresh=None, subset=None, inplace=False)\n",
" | Return object with labels on given axis omitted where alternately any\n",
" | or all of the data are missing\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | axis : {0 or 'index', 1 or 'columns'}, or tuple/list thereof\n",
" | Pass tuple or list to drop on multiple axes\n",
" | how : {'any', 'all'}\n",
" | * any : if any NA values are present, drop that label\n",
" | * all : if all values are NA, drop that label\n",
" | thresh : int, default None\n",
" | int value : require that many non-NA values\n",
" | subset : array-like\n",
" | Labels along other axis to consider, e.g. if you are dropping rows\n",
" | these would be a list of columns to include\n",
" | inplace : boolean, default False\n",
" | If True, do operation inplace and return None.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | dropped : DataFrame\n",
" | \n",
" | Examples\n",
" | --------\n",
" | >>> df = pd.DataFrame([[np.nan, 2, np.nan, 0], [3, 4, np.nan, 1],\n",
" | ... [np.nan, np.nan, np.nan, 5]],\n",
" | ... columns=list('ABCD'))\n",
" | >>> df\n",
" | A B C D\n",
" | 0 NaN 2.0 NaN 0\n",
" | 1 3.0 4.0 NaN 1\n",
" | 2 NaN NaN NaN 5\n",
" | \n",
" | Drop the columns where all elements are nan:\n",
" | \n",
" | >>> df.dropna(axis=1, how='all')\n",
" | A B D\n",
" | 0 NaN 2.0 0\n",
" | 1 3.0 4.0 1\n",
" | 2 NaN NaN 5\n",
" | \n",
" | Drop the columns where any of the elements is nan\n",
" | \n",
" | >>> df.dropna(axis=1, how='any')\n",
" | D\n",
" | 0 0\n",
" | 1 1\n",
" | 2 5\n",
" | \n",
" | Drop the rows where all of the elements are nan\n",
" | (there is no row to drop, so df stays the same):\n",
" | \n",
" | >>> df.dropna(axis=0, how='all')\n",
" | A B C D\n",
" | 0 NaN 2.0 NaN 0\n",
" | 1 3.0 4.0 NaN 1\n",
" | 2 NaN NaN NaN 5\n",
" | \n",
" | Keep only the rows with at least 2 non-na values:\n",
" | \n",
" | >>> df.dropna(thresh=2)\n",
" | A B C D\n",
" | 0 NaN 2.0 NaN 0\n",
" | 1 3.0 4.0 NaN 1\n",
" | \n",
" | duplicated(self, subset=None, keep='first')\n",
" | Return boolean Series denoting duplicate rows, optionally only\n",
" | considering certain columns\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | subset : column label or sequence of labels, optional\n",
" | Only consider certain columns for identifying duplicates, by\n",
" | default use all of the columns\n",
" | keep : {'first', 'last', False}, default 'first'\n",
" | - ``first`` : Mark duplicates as ``True`` except for the\n",
" | first occurrence.\n",
" | - ``last`` : Mark duplicates as ``True`` except for the\n",
" | last occurrence.\n",
" | - False : Mark all duplicates as ``True``.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | duplicated : Series\n",
" | \n",
" | eq(self, other, axis='columns', level=None)\n",
" | Wrapper for flexible comparison methods eq\n",
" | \n",
" | eval(self, expr, inplace=False, **kwargs)\n",
" | Evaluate an expression in the context of the calling DataFrame\n",
" | instance.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | expr : string\n",
" | The expression string to evaluate.\n",
" | inplace : bool, default False\n",
" | If the expression contains an assignment, whether to perform the\n",
" | operation inplace and mutate the existing DataFrame. Otherwise,\n",
" | a new DataFrame is returned.\n",
" | \n",
" | .. versionadded:: 0.18.0\n",
" | \n",
" | kwargs : dict\n",
" | See the documentation for :func:`~pandas.eval` for complete details\n",
" | on the keyword arguments accepted by\n",
" | :meth:`~pandas.DataFrame.query`.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | ret : ndarray, scalar, or pandas object\n",
" | \n",
" | See Also\n",
" | --------\n",
" | pandas.DataFrame.query\n",
" | pandas.DataFrame.assign\n",
" | pandas.eval\n",
" | \n",
" | Notes\n",
" | -----\n",
" | For more details see the API documentation for :func:`~pandas.eval`.\n",
" | For detailed examples see :ref:`enhancing performance with eval\n",
" | `.\n",
" | \n",
" | Examples\n",
" | --------\n",
" | >>> from numpy.random import randn\n",
" | >>> from pandas import DataFrame\n",
" | >>> df = DataFrame(randn(10, 2), columns=list('ab'))\n",
" | >>> df.eval('a + b')\n",
" | >>> df.eval('c = a + b')\n",
" | \n",
" | ewm(self, com=None, span=None, halflife=None, alpha=None, min_periods=0, freq=None, adjust=True, ignore_na=False, axis=0)\n",
" | Provides exponential weighted functions\n",
" | \n",
" | .. versionadded:: 0.18.0\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | com : float, optional\n",
" | Specify decay in terms of center of mass,\n",
" | :math:`\\alpha = 1 / (1 + com),\\text{ for } com \\geq 0`\n",
" | span : float, optional\n",
" | Specify decay in terms of span,\n",
" | :math:`\\alpha = 2 / (span + 1),\\text{ for } span \\geq 1`\n",
" | halflife : float, optional\n",
" | Specify decay in terms of half-life,\n",
" | :math:`\\alpha = 1 - exp(log(0.5) / halflife),\\text{ for } halflife > 0`\n",
" | alpha : float, optional\n",
" | Specify smoothing factor :math:`\\alpha` directly,\n",
" | :math:`0 < \\alpha \\leq 1`\n",
" | \n",
" | .. versionadded:: 0.18.0\n",
" | \n",
" | min_periods : int, default 0\n",
" | Minimum number of observations in window required to have a value\n",
" | (otherwise result is NA).\n",
" | freq : None or string alias / date offset object, default=None\n",
" | .. deprecated:: 0.18.0\n",
" | Frequency to conform to before computing statistic\n",
" | adjust : boolean, default True\n",
" | Divide by decaying adjustment factor in beginning periods to account\n",
" | for imbalance in relative weightings (viewing EWMA as a moving average)\n",
" | ignore_na : boolean, default False\n",
" | Ignore missing values when calculating weights;\n",
" | specify True to reproduce pre-0.15.0 behavior\n",
" | \n",
" | Returns\n",
" | -------\n",
" | a Window sub-classed for the particular operation\n",
" | \n",
" | Examples\n",
" | --------\n",
" | \n",
" | >>> df = DataFrame({'B': [0, 1, 2, np.nan, 4]})\n",
" | B\n",
" | 0 0.0\n",
" | 1 1.0\n",
" | 2 2.0\n",
" | 3 NaN\n",
" | 4 4.0\n",
" | \n",
" | >>> df.ewm(com=0.5).mean()\n",
" | B\n",
" | 0 0.000000\n",
" | 1 0.750000\n",
" | 2 1.615385\n",
" | 3 1.615385\n",
" | 4 3.670213\n",
" | \n",
" | Notes\n",
" | -----\n",
" | Exactly one of center of mass, span, half-life, and alpha must be provided.\n",
" | Allowed values and relationship between the parameters are specified in the\n",
" | parameter descriptions above; see the link at the end of this section for\n",
" | a detailed explanation.\n",
" | \n",
" | The `freq` keyword is used to conform time series data to a specified\n",
" | frequency by resampling the data. This is done with the default parameters\n",
" | of :meth:`~pandas.Series.resample` (i.e. using the `mean`).\n",
" | \n",
" | When adjust is True (default), weighted averages are calculated using\n",
" | weights (1-alpha)**(n-1), (1-alpha)**(n-2), ..., 1-alpha, 1.\n",
" | \n",
" | When adjust is False, weighted averages are calculated recursively as:\n",
" | weighted_average[0] = arg[0];\n",
" | weighted_average[i] = (1-alpha)*weighted_average[i-1] + alpha*arg[i].\n",
" | \n",
" | When ignore_na is False (default), weights are based on absolute positions.\n",
" | For example, the weights of x and y used in calculating the final weighted\n",
" | average of [x, None, y] are (1-alpha)**2 and 1 (if adjust is True), and\n",
" | (1-alpha)**2 and alpha (if adjust is False).\n",
" | \n",
" | When ignore_na is True (reproducing pre-0.15.0 behavior), weights are based\n",
" | on relative positions. For example, the weights of x and y used in\n",
" | calculating the final weighted average of [x, None, y] are 1-alpha and 1\n",
" | (if adjust is True), and 1-alpha and alpha (if adjust is False).\n",
" | \n",
" | More details can be found at\n",
" | http://pandas.pydata.org/pandas-docs/stable/computation.html#exponentially-weighted-windows\n",
" | \n",
" | expanding(self, min_periods=1, freq=None, center=False, axis=0)\n",
" | Provides expanding transformations.\n",
" | \n",
" | .. versionadded:: 0.18.0\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | min_periods : int, default None\n",
" | Minimum number of observations in window required to have a value\n",
" | (otherwise result is NA).\n",
" | freq : string or DateOffset object, optional (default None)\n",
" | .. deprecated:: 0.18.0\n",
" | Frequency to conform the data to before computing the statistic.\n",
" | Specified as a frequency string or DateOffset object.\n",
" | center : boolean, default False\n",
" | Set the labels at the center of the window.\n",
" | axis : int or string, default 0\n",
" | \n",
" | Returns\n",
" | -------\n",
" | a Window sub-classed for the particular operation\n",
" | \n",
" | Examples\n",
" | --------\n",
" | \n",
" | >>> df = DataFrame({'B': [0, 1, 2, np.nan, 4]})\n",
" | B\n",
" | 0 0.0\n",
" | 1 1.0\n",
" | 2 2.0\n",
" | 3 NaN\n",
" | 4 4.0\n",
" | \n",
" | >>> df.expanding(2).sum()\n",
" | B\n",
" | 0 NaN\n",
" | 1 1.0\n",
" | 2 3.0\n",
" | 3 3.0\n",
" | 4 7.0\n",
" | \n",
" | Notes\n",
" | -----\n",
" | By default, the result is set to the right edge of the window. This can be\n",
" | changed to the center of the window by setting ``center=True``.\n",
" | \n",
" | The `freq` keyword is used to conform time series data to a specified\n",
" | frequency by resampling the data. This is done with the default parameters\n",
" | of :meth:`~pandas.Series.resample` (i.e. using the `mean`).\n",
" | \n",
" | fillna(self, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None, **kwargs)\n",
" | Fill NA/NaN values using the specified method\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | value : scalar, dict, Series, or DataFrame\n",
" | Value to use to fill holes (e.g. 0), alternately a\n",
" | dict/Series/DataFrame of values specifying which value to use for\n",
" | each index (for a Series) or column (for a DataFrame). (values not\n",
" | in the dict/Series/DataFrame will not be filled). This value cannot\n",
" | be a list.\n",
" | method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None\n",
" | Method to use for filling holes in reindexed Series\n",
" | pad / ffill: propagate last valid observation forward to next valid\n",
" | backfill / bfill: use NEXT valid observation to fill gap\n",
" | axis : {0 or 'index', 1 or 'columns'}\n",
" | inplace : boolean, default False\n",
" | If True, fill in place. Note: this will modify any\n",
" | other views on this object, (e.g. a no-copy slice for a column in a\n",
" | DataFrame).\n",
" | limit : int, default None\n",
" | If method is specified, this is the maximum number of consecutive\n",
" | NaN values to forward/backward fill. In other words, if there is\n",
" | a gap with more than this number of consecutive NaNs, it will only\n",
" | be partially filled. If method is not specified, this is the\n",
" | maximum number of entries along the entire axis where NaNs will be\n",
" | filled. Must be greater than 0 if not None.\n",
" | downcast : dict, default is None\n",
" | a dict of item->dtype of what to downcast if possible,\n",
" | or the string 'infer' which will try to downcast to an appropriate\n",
" | equal type (e.g. float64 to int64 if possible)\n",
" | \n",
" | See Also\n",
" | --------\n",
" | reindex, asfreq\n",
" | \n",
" | Returns\n",
" | -------\n",
" | filled : DataFrame\n",
" | \n",
" | Examples\n",
" | --------\n",
" | >>> df = pd.DataFrame([[np.nan, 2, np.nan, 0],\n",
" | ... [3, 4, np.nan, 1],\n",
" | ... [np.nan, np.nan, np.nan, 5],\n",
" | ... [np.nan, 3, np.nan, 4]],\n",
" | ... columns=list('ABCD'))\n",
" | >>> df\n",
" | A B C D\n",
" | 0 NaN 2.0 NaN 0\n",
" | 1 3.0 4.0 NaN 1\n",
" | 2 NaN NaN NaN 5\n",
" | 3 NaN 3.0 NaN 4\n",
" | \n",
" | Replace all NaN elements with 0s.\n",
" | \n",
" | >>> df.fillna(0)\n",
" | A B C D\n",
" | 0 0.0 2.0 0.0 0\n",
" | 1 3.0 4.0 0.0 1\n",
" | 2 0.0 0.0 0.0 5\n",
" | 3 0.0 3.0 0.0 4\n",
" | \n",
" | We can also propagate non-null values forward or backward.\n",
" | \n",
" | >>> df.fillna(method='ffill')\n",
" | A B C D\n",
" | 0 NaN 2.0 NaN 0\n",
" | 1 3.0 4.0 NaN 1\n",
" | 2 3.0 4.0 NaN 5\n",
" | 3 3.0 3.0 NaN 4\n",
" | \n",
" | Replace all NaN elements in column 'A', 'B', 'C', and 'D', with 0, 1,\n",
" | 2, and 3 respectively.\n",
" | \n",
" | >>> values = {'A': 0, 'B': 1, 'C': 2, 'D': 3}\n",
" | >>> df.fillna(value=values)\n",
" | A B C D\n",
" | 0 0.0 2.0 2.0 0\n",
" | 1 3.0 4.0 2.0 1\n",
" | 2 0.0 1.0 2.0 5\n",
" | 3 0.0 3.0 2.0 4\n",
" | \n",
" | Only replace the first NaN element.\n",
" | \n",
" | >>> df.fillna(value=values, limit=1)\n",
" | A B C D\n",
" | 0 0.0 2.0 2.0 0\n",
" | 1 3.0 4.0 NaN 1\n",
" | 2 NaN 1.0 NaN 5\n",
" | 3 NaN 3.0 NaN 4\n",
" | \n",
" | first_valid_index(self)\n",
" | Return index for first non-NA/null value.\n",
" | \n",
" | Notes\n",
" | --------\n",
" | If all elements are non-NA/null, returns None.\n",
" | Also returns None for empty DataFrame.\n",
" | \n",
" | Returns\n",
" | --------\n",
" | scalar : type of index\n",
" | \n",
" | floordiv(self, other, axis='columns', level=None, fill_value=None)\n",
" | Integer division of dataframe and other, element-wise (binary operator `floordiv`).\n",
" | \n",
" | Equivalent to ``dataframe // other``, but with support to substitute a fill_value for\n",
" | missing data in one of the inputs.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | other : Series, DataFrame, or constant\n",
" | axis : {0, 1, 'index', 'columns'}\n",
" | For Series input, axis to match Series index on\n",
" | fill_value : None or float value, default None\n",
" | Fill missing (NaN) values with this value. If both DataFrame\n",
" | locations are missing, the result will be missing\n",
" | level : int or name\n",
" | Broadcast across a level, matching Index values on the\n",
" | passed MultiIndex level\n",
" | \n",
" | Notes\n",
" | -----\n",
" | Mismatched indices will be unioned together\n",
" | \n",
" | Returns\n",
" | -------\n",
" | result : DataFrame\n",
" | \n",
" | See also\n",
" | --------\n",
" | DataFrame.rfloordiv\n",
" | \n",
" | ge(self, other, axis='columns', level=None)\n",
" | Wrapper for flexible comparison methods ge\n",
" | \n",
" | get_value(self, index, col, takeable=False)\n",
" | Quickly retrieve single value at passed column and index\n",
" | \n",
" | .. deprecated:: 0.21.0\n",
" | \n",
" | Please use .at[] or .iat[] accessors.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | index : row label\n",
" | col : column label\n",
" | takeable : interpret the index/col as indexers, default False\n",
" | \n",
" | Returns\n",
" | -------\n",
" | value : scalar value\n",
" | \n",
" | gt(self, other, axis='columns', level=None)\n",
" | Wrapper for flexible comparison methods gt\n",
" | \n",
" | hist = hist_frame(data, column=None, by=None, grid=True, xlabelsize=None, xrot=None, ylabelsize=None, yrot=None, ax=None, sharex=False, sharey=False, figsize=None, layout=None, bins=10, **kwds)\n",
" | Draw histogram of the DataFrame's series using matplotlib / pylab.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | data : DataFrame\n",
" | column : string or sequence\n",
" | If passed, will be used to limit data to a subset of columns\n",
" | by : object, optional\n",
" | If passed, then used to form histograms for separate groups\n",
" | grid : boolean, default True\n",
" | Whether to show axis grid lines\n",
" | xlabelsize : int, default None\n",
" | If specified changes the x-axis label size\n",
" | xrot : float, default None\n",
" | rotation of x axis labels\n",
" | ylabelsize : int, default None\n",
" | If specified changes the y-axis label size\n",
" | yrot : float, default None\n",
" | rotation of y axis labels\n",
" | ax : matplotlib axes object, default None\n",
" | sharex : boolean, default True if ax is None else False\n",
" | In case subplots=True, share x axis and set some x axis labels to\n",
" | invisible; defaults to True if ax is None otherwise False if an ax\n",
" | is passed in; Be aware, that passing in both an ax and sharex=True\n",
" | will alter all x axis labels for all subplots in a figure!\n",
" | sharey : boolean, default False\n",
" | In case subplots=True, share y axis and set some y axis labels to\n",
" | invisible\n",
" | figsize : tuple\n",
" | The size of the figure to create in inches by default\n",
" | layout : tuple, optional\n",
" | Tuple of (rows, columns) for the layout of the histograms\n",
" | bins : integer, default 10\n",
" | Number of histogram bins to be used\n",
" | kwds : other plotting keyword arguments\n",
" | To be passed to hist function\n",
" | \n",
" | idxmax(self, axis=0, skipna=True)\n",
" | Return index of first occurrence of maximum over requested axis.\n",
" | NA/null values are excluded.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | axis : {0 or 'index', 1 or 'columns'}, default 0\n",
" | 0 or 'index' for row-wise, 1 or 'columns' for column-wise\n",
" | skipna : boolean, default True\n",
" | Exclude NA/null values. If an entire row/column is NA, the result\n",
" | will be NA.\n",
" | \n",
" | Raises\n",
" | ------\n",
" | ValueError\n",
" | * If the row/column is empty\n",
" | \n",
" | Returns\n",
" | -------\n",
" | idxmax : Series\n",
" | \n",
" | Notes\n",
" | -----\n",
" | This method is the DataFrame version of ``ndarray.argmax``.\n",
" | \n",
" | See Also\n",
" | --------\n",
" | Series.idxmax\n",
" | \n",
" | idxmin(self, axis=0, skipna=True)\n",
" | Return index of first occurrence of minimum over requested axis.\n",
" | NA/null values are excluded.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | axis : {0 or 'index', 1 or 'columns'}, default 0\n",
" | 0 or 'index' for row-wise, 1 or 'columns' for column-wise\n",
" | skipna : boolean, default True\n",
" | Exclude NA/null values. If an entire row/column is NA, the result\n",
" | will be NA.\n",
" | \n",
" | Raises\n",
" | ------\n",
" | ValueError\n",
" | * If the row/column is empty\n",
" | \n",
" | Returns\n",
" | -------\n",
" | idxmin : Series\n",
" | \n",
" | Notes\n",
" | -----\n",
" | This method is the DataFrame version of ``ndarray.argmin``.\n",
" | \n",
" | See Also\n",
" | --------\n",
" | Series.idxmin\n",
" | \n",
" | info(self, verbose=None, buf=None, max_cols=None, memory_usage=None, null_counts=None)\n",
" | Concise summary of a DataFrame.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | verbose : {None, True, False}, optional\n",
" | Whether to print the full summary.\n",
" | None follows the `display.max_info_columns` setting.\n",
" | True or False overrides the `display.max_info_columns` setting.\n",
" | buf : writable buffer, defaults to sys.stdout\n",
" | max_cols : int, default None\n",
" | Determines whether full summary or short summary is printed.\n",
" | None follows the `display.max_info_columns` setting.\n",
" | memory_usage : boolean/string, default None\n",
" | Specifies whether total memory usage of the DataFrame\n",
" | elements (including index) should be displayed. None follows\n",
" | the `display.memory_usage` setting. True or False overrides\n",
" | the `display.memory_usage` setting. A value of 'deep' is equivalent\n",
" | of True, with deep introspection. Memory usage is shown in\n",
" | human-readable units (base-2 representation).\n",
" | null_counts : boolean, default None\n",
" | Whether to show the non-null counts\n",
" | \n",
" | - If None, then only show if the frame is smaller than\n",
" | max_info_rows and max_info_columns.\n",
" | - If True, always show counts.\n",
" | - If False, never show counts.\n",
" | \n",
" | insert(self, loc, column, value, allow_duplicates=False)\n",
" | Insert column into DataFrame at specified location.\n",
" | \n",
" | Raises a ValueError if `column` is already contained in the DataFrame,\n",
" | unless `allow_duplicates` is set to True.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | loc : int\n",
" | Insertion index. Must verify 0 <= loc <= len(columns)\n",
" | column : string, number, or hashable object\n",
" | label of the inserted column\n",
" | value : int, Series, or array-like\n",
" | allow_duplicates : bool, optional\n",
" | \n",
" | isin(self, values)\n",
" | Return boolean DataFrame showing whether each element in the\n",
" | DataFrame is contained in values.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | values : iterable, Series, DataFrame or dictionary\n",
" | The result will only be true at a location if all the\n",
" | labels match. If `values` is a Series, that's the index. If\n",
" | `values` is a dictionary, the keys must be the column names,\n",
" | which must match. If `values` is a DataFrame,\n",
" | then both the index and column labels must match.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | \n",
" | DataFrame of booleans\n",
" | \n",
" | Examples\n",
" | --------\n",
" | When ``values`` is a list:\n",
" | \n",
" | >>> df = DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'f']})\n",
" | >>> df.isin([1, 3, 12, 'a'])\n",
" | A B\n",
" | 0 True True\n",
" | 1 False False\n",
" | 2 True False\n",
" | \n",
" | When ``values`` is a dict:\n",
" | \n",
" | >>> df = DataFrame({'A': [1, 2, 3], 'B': [1, 4, 7]})\n",
" | >>> df.isin({'A': [1, 3], 'B': [4, 7, 12]})\n",
" | A B\n",
" | 0 True False # Note that B didn't match the 1 here.\n",
" | 1 False True\n",
" | 2 True True\n",
" | \n",
" | When ``values`` is a Series or DataFrame:\n",
" | \n",
" | >>> df = DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'f']})\n",
" | >>> other = DataFrame({'A': [1, 3, 3, 2], 'B': ['e', 'f', 'f', 'e']})\n",
" | >>> df.isin(other)\n",
" | A B\n",
" | 0 True False\n",
" | 1 False False # Column A in `other` has a 3, but not at index 1.\n",
" | 2 True True\n",
" | \n",
" | isna(self)\n",
" | Return a boolean same-sized object indicating if the values are NA.\n",
" | \n",
" | See Also\n",
" | --------\n",
" | DataFrame.notna : boolean inverse of isna\n",
" | DataFrame.isnull : alias of isna\n",
" | isna : top-level isna\n",
" | \n",
" | isnull(self)\n",
" | Return a boolean same-sized object indicating if the values are NA.\n",
" | \n",
" | See Also\n",
" | --------\n",
" | DataFrame.notna : boolean inverse of isna\n",
" | DataFrame.isnull : alias of isna\n",
" | isna : top-level isna\n",
" | \n",
" | items = iteritems(self)\n",
" | \n",
" | iteritems(self)\n",
" | Iterator over (column name, Series) pairs.\n",
" | \n",
" | See also\n",
" | --------\n",
" | iterrows : Iterate over DataFrame rows as (index, Series) pairs.\n",
" | itertuples : Iterate over DataFrame rows as namedtuples of the values.\n",
" | \n",
" | iterrows(self)\n",
" | Iterate over DataFrame rows as (index, Series) pairs.\n",
" | \n",
" | Notes\n",
" | -----\n",
" | \n",
" | 1. Because ``iterrows`` returns a Series for each row,\n",
" | it does **not** preserve dtypes across the rows (dtypes are\n",
" | preserved across columns for DataFrames). For example,\n",
" | \n",
" | >>> df = pd.DataFrame([[1, 1.5]], columns=['int', 'float'])\n",
" | >>> row = next(df.iterrows())[1]\n",
" | >>> row\n",
" | int 1.0\n",
" | float 1.5\n",
" | Name: 0, dtype: float64\n",
" | >>> print(row['int'].dtype)\n",
" | float64\n",
" | >>> print(df['int'].dtype)\n",
" | int64\n",
" | \n",
" | To preserve dtypes while iterating over the rows, it is better\n",
" | to use :meth:`itertuples` which returns namedtuples of the values\n",
" | and which is generally faster than ``iterrows``.\n",
" | \n",
" | 2. You should **never modify** something you are iterating over.\n",
" | This is not guaranteed to work in all cases. Depending on the\n",
" | data types, the iterator returns a copy and not a view, and writing\n",
" | to it will have no effect.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | it : generator\n",
" | A generator that iterates over the rows of the frame.\n",
" | \n",
" | See also\n",
" | --------\n",
" | itertuples : Iterate over DataFrame rows as namedtuples of the values.\n",
" | iteritems : Iterate over (column name, Series) pairs.\n",
" | \n",
" | itertuples(self, index=True, name='Pandas')\n",
" | Iterate over DataFrame rows as namedtuples, with index value as first\n",
" | element of the tuple.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | index : boolean, default True\n",
" | If True, return the index as the first element of the tuple.\n",
" | name : string, default \"Pandas\"\n",
" | The name of the returned namedtuples or None to return regular\n",
" | tuples.\n",
" | \n",
" | Notes\n",
" | -----\n",
" | The column names will be renamed to positional names if they are\n",
" | invalid Python identifiers, repeated, or start with an underscore.\n",
" | With a large number of columns (>255), regular tuples are returned.\n",
" | \n",
" | See also\n",
" | --------\n",
" | iterrows : Iterate over DataFrame rows as (index, Series) pairs.\n",
" | iteritems : Iterate over (column name, Series) pairs.\n",
" | \n",
" | Examples\n",
" | --------\n",
" | \n",
" | >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [0.1, 0.2]},\n",
" | index=['a', 'b'])\n",
" | >>> df\n",
" | col1 col2\n",
" | a 1 0.1\n",
" | b 2 0.2\n",
" | >>> for row in df.itertuples():\n",
" | ... print(row)\n",
" | ...\n",
" | Pandas(Index='a', col1=1, col2=0.10000000000000001)\n",
" | Pandas(Index='b', col1=2, col2=0.20000000000000001)\n",
" | \n",
" | join(self, other, on=None, how='left', lsuffix='', rsuffix='', sort=False)\n",
" | Join columns with other DataFrame either on index or on a key\n",
" | column. Efficiently Join multiple DataFrame objects by index at once by\n",
" | passing a list.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | other : DataFrame, Series with name field set, or list of DataFrame\n",
" | Index should be similar to one of the columns in this one. If a\n",
" | Series is passed, its name attribute must be set, and that will be\n",
" | used as the column name in the resulting joined DataFrame\n",
" | on : column name, tuple/list of column names, or array-like\n",
" | Column(s) in the caller to join on the index in other,\n",
" | otherwise joins index-on-index. If multiples\n",
" | columns given, the passed DataFrame must have a MultiIndex. Can\n",
" | pass an array as the join key if not already contained in the\n",
" | calling DataFrame. Like an Excel VLOOKUP operation\n",
" | how : {'left', 'right', 'outer', 'inner'}, default: 'left'\n",
" | How to handle the operation of the two objects.\n",
" | \n",
" | * left: use calling frame's index (or column if on is specified)\n",
" | * right: use other frame's index\n",
" | * outer: form union of calling frame's index (or column if on is\n",
" | specified) with other frame's index, and sort it\n",
" | lexicographically\n",
" | * inner: form intersection of calling frame's index (or column if\n",
" | on is specified) with other frame's index, preserving the order\n",
" | of the calling's one\n",
" | lsuffix : string\n",
" | Suffix to use from left frame's overlapping columns\n",
" | rsuffix : string\n",
" | Suffix to use from right frame's overlapping columns\n",
" | sort : boolean, default False\n",
" | Order result DataFrame lexicographically by the join key. If False,\n",
" | the order of the join key depends on the join type (how keyword)\n",
" | \n",
" | Notes\n",
" | -----\n",
" | on, lsuffix, and rsuffix options are not supported when passing a list\n",
" | of DataFrame objects\n",
" | \n",
" | Examples\n",
" | --------\n",
" | >>> caller = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3', 'K4', 'K5'],\n",
" | ... 'A': ['A0', 'A1', 'A2', 'A3', 'A4', 'A5']})\n",
" | \n",
" | >>> caller\n",
" | A key\n",
" | 0 A0 K0\n",
" | 1 A1 K1\n",
" | 2 A2 K2\n",
" | 3 A3 K3\n",
" | 4 A4 K4\n",
" | 5 A5 K5\n",
" | \n",
" | >>> other = pd.DataFrame({'key': ['K0', 'K1', 'K2'],\n",
" | ... 'B': ['B0', 'B1', 'B2']})\n",
" | \n",
" | >>> other\n",
" | B key\n",
" | 0 B0 K0\n",
" | 1 B1 K1\n",
" | 2 B2 K2\n",
" | \n",
" | Join DataFrames using their indexes.\n",
" | \n",
" | >>> caller.join(other, lsuffix='_caller', rsuffix='_other')\n",
" | \n",
" | >>> A key_caller B key_other\n",
" | 0 A0 K0 B0 K0\n",
" | 1 A1 K1 B1 K1\n",
" | 2 A2 K2 B2 K2\n",
" | 3 A3 K3 NaN NaN\n",
" | 4 A4 K4 NaN NaN\n",
" | 5 A5 K5 NaN NaN\n",
" | \n",
" | \n",
" | If we want to join using the key columns, we need to set key to be\n",
" | the index in both caller and other. The joined DataFrame will have\n",
" | key as its index.\n",
" | \n",
" | >>> caller.set_index('key').join(other.set_index('key'))\n",
" | \n",
" | >>> A B\n",
" | key\n",
" | K0 A0 B0\n",
" | K1 A1 B1\n",
" | K2 A2 B2\n",
" | K3 A3 NaN\n",
" | K4 A4 NaN\n",
" | K5 A5 NaN\n",
" | \n",
" | Another option to join using the key columns is to use the on\n",
" | parameter. DataFrame.join always uses other's index but we can use any\n",
" | column in the caller. This method preserves the original caller's\n",
" | index in the result.\n",
" | \n",
" | >>> caller.join(other.set_index('key'), on='key')\n",
" | \n",
" | >>> A key B\n",
" | 0 A0 K0 B0\n",
" | 1 A1 K1 B1\n",
" | 2 A2 K2 B2\n",
" | 3 A3 K3 NaN\n",
" | 4 A4 K4 NaN\n",
" | 5 A5 K5 NaN\n",
" | \n",
" | \n",
" | See also\n",
" | --------\n",
" | DataFrame.merge : For column(s)-on-columns(s) operations\n",
" | \n",
" | Returns\n",
" | -------\n",
" | joined : DataFrame\n",
" | \n",
" | kurt(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs)\n",
" | Return unbiased kurtosis over requested axis using Fisher's definition of\n",
" | kurtosis (kurtosis of normal == 0.0). Normalized by N-1\n",
" | \n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | axis : {index (0), columns (1)}\n",
" | skipna : boolean, default True\n",
" | Exclude NA/null values when computing the result.\n",
" | level : int or level name, default None\n",
" | If the axis is a MultiIndex (hierarchical), count along a\n",
" | particular level, collapsing into a Series\n",
" | numeric_only : boolean, default None\n",
" | Include only float, int, boolean columns. If None, will attempt to use\n",
" | everything, then use only numeric data. Not implemented for Series.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | kurt : Series or DataFrame (if level specified)\n",
" | \n",
" | kurtosis = kurt(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs)\n",
" | \n",
" | last_valid_index(self)\n",
" | Return index for last non-NA/null value.\n",
" | \n",
" | Notes\n",
" | --------\n",
" | If all elements are non-NA/null, returns None.\n",
" | Also returns None for empty DataFrame.\n",
" | \n",
" | Returns\n",
" | --------\n",
" | scalar : type of index\n",
" | \n",
" | le(self, other, axis='columns', level=None)\n",
" | Wrapper for flexible comparison methods le\n",
" | \n",
" | lookup(self, row_labels, col_labels)\n",
" | Label-based \"fancy indexing\" function for DataFrame.\n",
" | Given equal-length arrays of row and column labels, return an\n",
" | array of the values corresponding to each (row, col) pair.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | row_labels : sequence\n",
" | The row labels to use for lookup\n",
" | col_labels : sequence\n",
" | The column labels to use for lookup\n",
" | \n",
" | Notes\n",
" | -----\n",
" | Akin to::\n",
" | \n",
" | result = []\n",
" | for row, col in zip(row_labels, col_labels):\n",
" | result.append(df.get_value(row, col))\n",
" | \n",
" | Examples\n",
" | --------\n",
" | values : ndarray\n",
" | The found values\n",
" | \n",
" | lt(self, other, axis='columns', level=None)\n",
" | Wrapper for flexible comparison methods lt\n",
" | \n",
" | mad(self, axis=None, skipna=None, level=None)\n",
" | Return the mean absolute deviation of the values for the requested axis\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | axis : {index (0), columns (1)}\n",
" | skipna : boolean, default True\n",
" | Exclude NA/null values when computing the result.\n",
" | level : int or level name, default None\n",
" | If the axis is a MultiIndex (hierarchical), count along a\n",
" | particular level, collapsing into a Series\n",
" | numeric_only : boolean, default None\n",
" | Include only float, int, boolean columns. If None, will attempt to use\n",
" | everything, then use only numeric data. Not implemented for Series.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | mad : Series or DataFrame (if level specified)\n",
" | \n",
" | max(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs)\n",
" | This method returns the maximum of the values in the object.\n",
" | If you want the *index* of the maximum, use ``idxmax``. This is\n",
" | the equivalent of the ``numpy.ndarray`` method ``argmax``.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | axis : {index (0), columns (1)}\n",
" | skipna : boolean, default True\n",
" | Exclude NA/null values when computing the result.\n",
" | level : int or level name, default None\n",
" | If the axis is a MultiIndex (hierarchical), count along a\n",
" | particular level, collapsing into a Series\n",
" | numeric_only : boolean, default None\n",
" | Include only float, int, boolean columns. If None, will attempt to use\n",
" | everything, then use only numeric data. Not implemented for Series.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | max : Series or DataFrame (if level specified)\n",
" | \n",
" | mean(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs)\n",
" | Return the mean of the values for the requested axis\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | axis : {index (0), columns (1)}\n",
" | skipna : boolean, default True\n",
" | Exclude NA/null values when computing the result.\n",
" | level : int or level name, default None\n",
" | If the axis is a MultiIndex (hierarchical), count along a\n",
" | particular level, collapsing into a Series\n",
" | numeric_only : boolean, default None\n",
" | Include only float, int, boolean columns. If None, will attempt to use\n",
" | everything, then use only numeric data. Not implemented for Series.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | mean : Series or DataFrame (if level specified)\n",
" | \n",
" | median(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs)\n",
" | Return the median of the values for the requested axis\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | axis : {index (0), columns (1)}\n",
" | skipna : boolean, default True\n",
" | Exclude NA/null values when computing the result.\n",
" | level : int or level name, default None\n",
" | If the axis is a MultiIndex (hierarchical), count along a\n",
" | particular level, collapsing into a Series\n",
" | numeric_only : boolean, default None\n",
" | Include only float, int, boolean columns. If None, will attempt to use\n",
" | everything, then use only numeric data. Not implemented for Series.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | median : Series or DataFrame (if level specified)\n",
" | \n",
" | melt(self, id_vars=None, value_vars=None, var_name=None, value_name='value', col_level=None)\n",
" | \"Unpivots\" a DataFrame from wide format to long format, optionally\n",
" | leaving identifier variables set.\n",
" | \n",
" | This function is useful to massage a DataFrame into a format where one\n",
" | or more columns are identifier variables (`id_vars`), while all other\n",
" | columns, considered measured variables (`value_vars`), are \"unpivoted\" to\n",
" | the row axis, leaving just two non-identifier columns, 'variable' and\n",
" | 'value'.\n",
" | \n",
" | .. versionadded:: 0.20.0\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | frame : DataFrame\n",
" | id_vars : tuple, list, or ndarray, optional\n",
" | Column(s) to use as identifier variables.\n",
" | value_vars : tuple, list, or ndarray, optional\n",
" | Column(s) to unpivot. If not specified, uses all columns that\n",
" | are not set as `id_vars`.\n",
" | var_name : scalar\n",
" | Name to use for the 'variable' column. If None it uses\n",
" | ``frame.columns.name`` or 'variable'.\n",
" | value_name : scalar, default 'value'\n",
" | Name to use for the 'value' column.\n",
" | col_level : int or string, optional\n",
" | If columns are a MultiIndex then use this level to melt.\n",
" | \n",
" | See also\n",
" | --------\n",
" | melt\n",
" | pivot_table\n",
" | DataFrame.pivot\n",
" | \n",
" | Examples\n",
" | --------\n",
" | >>> import pandas as pd\n",
" | >>> df = pd.DataFrame({'A': {0: 'a', 1: 'b', 2: 'c'},\n",
" | ... 'B': {0: 1, 1: 3, 2: 5},\n",
" | ... 'C': {0: 2, 1: 4, 2: 6}})\n",
" | >>> df\n",
" | A B C\n",
" | 0 a 1 2\n",
" | 1 b 3 4\n",
" | 2 c 5 6\n",
" | \n",
" | >>> df.melt(id_vars=['A'], value_vars=['B'])\n",
" | A variable value\n",
" | 0 a B 1\n",
" | 1 b B 3\n",
" | 2 c B 5\n",
" | \n",
" | >>> df.melt(id_vars=['A'], value_vars=['B', 'C'])\n",
" | A variable value\n",
" | 0 a B 1\n",
" | 1 b B 3\n",
" | 2 c B 5\n",
" | 3 a C 2\n",
" | 4 b C 4\n",
" | 5 c C 6\n",
" | \n",
" | The names of 'variable' and 'value' columns can be customized:\n",
" | \n",
" | >>> df.melt(id_vars=['A'], value_vars=['B'],\n",
" | ... var_name='myVarname', value_name='myValname')\n",
" | A myVarname myValname\n",
" | 0 a B 1\n",
" | 1 b B 3\n",
" | 2 c B 5\n",
" | \n",
" | If you have multi-index columns:\n",
" | \n",
" | >>> df.columns = [list('ABC'), list('DEF')]\n",
" | >>> df\n",
" | A B C\n",
" | D E F\n",
" | 0 a 1 2\n",
" | 1 b 3 4\n",
" | 2 c 5 6\n",
" | \n",
" | >>> df.melt(col_level=0, id_vars=['A'], value_vars=['B'])\n",
" | A variable value\n",
" | 0 a B 1\n",
" | 1 b B 3\n",
" | 2 c B 5\n",
" | \n",
" | >>> df.melt(id_vars=[('A', 'D')], value_vars=[('B', 'E')])\n",
" | (A, D) variable_0 variable_1 value\n",
" | 0 a B E 1\n",
" | 1 b B E 3\n",
" | 2 c B E 5\n",
" | \n",
" | memory_usage(self, index=True, deep=False)\n",
" | Memory usage of DataFrame columns.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | index : bool\n",
" | Specifies whether to include memory usage of DataFrame's\n",
" | index in returned Series. If `index=True` (default is False)\n",
" | the first index of the Series is `Index`.\n",
" | deep : bool\n",
" | Introspect the data deeply, interrogate\n",
" | `object` dtypes for system-level memory consumption\n",
" | \n",
" | Returns\n",
" | -------\n",
" | sizes : Series\n",
" | A series with column names as index and memory usage of\n",
" | columns with units of bytes.\n",
" | \n",
" | Notes\n",
" | -----\n",
" | Memory usage does not include memory consumed by elements that\n",
" | are not components of the array if deep=False\n",
" | \n",
" | See Also\n",
" | --------\n",
" | numpy.ndarray.nbytes\n",
" | \n",
" | merge(self, right, how='inner', on=None, left_on=None, right_on=None, left_index=False, right_index=False, sort=False, suffixes=('_x', '_y'), copy=True, indicator=False, validate=None)\n",
" | Merge DataFrame objects by performing a database-style join operation by\n",
" | columns or indexes.\n",
" | \n",
" | If joining columns on columns, the DataFrame indexes *will be\n",
" | ignored*. Otherwise if joining indexes on indexes or indexes on a column or\n",
" | columns, the index will be passed on.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | right : DataFrame\n",
" | how : {'left', 'right', 'outer', 'inner'}, default 'inner'\n",
" | * left: use only keys from left frame, similar to a SQL left outer join;\n",
" | preserve key order\n",
" | * right: use only keys from right frame, similar to a SQL right outer join;\n",
" | preserve key order\n",
" | * outer: use union of keys from both frames, similar to a SQL full outer\n",
" | join; sort keys lexicographically\n",
" | * inner: use intersection of keys from both frames, similar to a SQL inner\n",
" | join; preserve the order of the left keys\n",
" | on : label or list\n",
" | Field names to join on. Must be found in both DataFrames. If on is\n",
" | None and not merging on indexes, then it merges on the intersection of\n",
" | the columns by default.\n",
" | left_on : label or list, or array-like\n",
" | Field names to join on in left DataFrame. Can be a vector or list of\n",
" | vectors of the length of the DataFrame to use a particular vector as\n",
" | the join key instead of columns\n",
" | right_on : label or list, or array-like\n",
" | Field names to join on in right DataFrame or vector/list of vectors per\n",
" | left_on docs\n",
" | left_index : boolean, default False\n",
" | Use the index from the left DataFrame as the join key(s). If it is a\n",
" | MultiIndex, the number of keys in the other DataFrame (either the index\n",
" | or a number of columns) must match the number of levels\n",
" | right_index : boolean, default False\n",
" | Use the index from the right DataFrame as the join key. Same caveats as\n",
" | left_index\n",
" | sort : boolean, default False\n",
" | Sort the join keys lexicographically in the result DataFrame. If False,\n",
" | the order of the join keys depends on the join type (how keyword)\n",
" | suffixes : 2-length sequence (tuple, list, ...)\n",
" | Suffix to apply to overlapping column names in the left and right\n",
" | side, respectively\n",
" | copy : boolean, default True\n",
" | If False, do not copy data unnecessarily\n",
" | indicator : boolean or string, default False\n",
" | If True, adds a column to output DataFrame called \"_merge\" with\n",
" | information on the source of each row.\n",
" | If string, column with information on source of each row will be added to\n",
" | output DataFrame, and column will be named value of string.\n",
" | Information column is Categorical-type and takes on a value of \"left_only\"\n",
" | for observations whose merge key only appears in 'left' DataFrame,\n",
" | \"right_only\" for observations whose merge key only appears in 'right'\n",
" | DataFrame, and \"both\" if the observation's merge key is found in both.\n",
" | \n",
" | .. versionadded:: 0.17.0\n",
" | \n",
" | validate : string, default None\n",
" | If specified, checks if merge is of specified type.\n",
" | \n",
" | * \"one_to_one\" or \"1:1\": check if merge keys are unique in both\n",
" | left and right datasets.\n",
" | * \"one_to_many\" or \"1:m\": check if merge keys are unique in left\n",
" | dataset.\n",
" | * \"many_to_one\" or \"m:1\": check if merge keys are unique in right\n",
" | dataset.\n",
" | * \"many_to_many\" or \"m:m\": allowed, but does not result in checks.\n",
" | \n",
" | .. versionadded:: 0.21.0\n",
" | \n",
" | Examples\n",
" | --------\n",
" | \n",
" | >>> A >>> B\n",
" | lkey value rkey value\n",
" | 0 foo 1 0 foo 5\n",
" | 1 bar 2 1 bar 6\n",
" | 2 baz 3 2 qux 7\n",
" | 3 foo 4 3 bar 8\n",
" | \n",
" | >>> A.merge(B, left_on='lkey', right_on='rkey', how='outer')\n",
" | lkey value_x rkey value_y\n",
" | 0 foo 1 foo 5\n",
" | 1 foo 4 foo 5\n",
" | 2 bar 2 bar 6\n",
" | 3 bar 2 bar 8\n",
" | 4 baz 3 NaN NaN\n",
" | 5 NaN NaN qux 7\n",
" | \n",
" | Returns\n",
" | -------\n",
" | merged : DataFrame\n",
" | The output type will the be same as 'left', if it is a subclass\n",
" | of DataFrame.\n",
" | \n",
" | See also\n",
" | --------\n",
" | merge_ordered\n",
" | merge_asof\n",
" | \n",
" | min(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs)\n",
" | This method returns the minimum of the values in the object.\n",
" | If you want the *index* of the minimum, use ``idxmin``. This is\n",
" | the equivalent of the ``numpy.ndarray`` method ``argmin``.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | axis : {index (0), columns (1)}\n",
" | skipna : boolean, default True\n",
" | Exclude NA/null values when computing the result.\n",
" | level : int or level name, default None\n",
" | If the axis is a MultiIndex (hierarchical), count along a\n",
" | particular level, collapsing into a Series\n",
" | numeric_only : boolean, default None\n",
" | Include only float, int, boolean columns. If None, will attempt to use\n",
" | everything, then use only numeric data. Not implemented for Series.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | min : Series or DataFrame (if level specified)\n",
" | \n",
" | mod(self, other, axis='columns', level=None, fill_value=None)\n",
" | Modulo of dataframe and other, element-wise (binary operator `mod`).\n",
" | \n",
" | Equivalent to ``dataframe % other``, but with support to substitute a fill_value for\n",
" | missing data in one of the inputs.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | other : Series, DataFrame, or constant\n",
" | axis : {0, 1, 'index', 'columns'}\n",
" | For Series input, axis to match Series index on\n",
" | fill_value : None or float value, default None\n",
" | Fill missing (NaN) values with this value. If both DataFrame\n",
" | locations are missing, the result will be missing\n",
" | level : int or name\n",
" | Broadcast across a level, matching Index values on the\n",
" | passed MultiIndex level\n",
" | \n",
" | Notes\n",
" | -----\n",
" | Mismatched indices will be unioned together\n",
" | \n",
" | Returns\n",
" | -------\n",
" | result : DataFrame\n",
" | \n",
" | See also\n",
" | --------\n",
" | DataFrame.rmod\n",
" | \n",
" | mode(self, axis=0, numeric_only=False)\n",
" | Gets the mode(s) of each element along the axis selected. Adds a row\n",
" | for each mode per label, fills in gaps with nan.\n",
" | \n",
" | Note that there could be multiple values returned for the selected\n",
" | axis (when more than one item share the maximum frequency), which is\n",
" | the reason why a dataframe is returned. If you want to impute missing\n",
" | values with the mode in a dataframe ``df``, you can just do this:\n",
" | ``df.fillna(df.mode().iloc[0])``\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | axis : {0 or 'index', 1 or 'columns'}, default 0\n",
" | * 0 or 'index' : get mode of each column\n",
" | * 1 or 'columns' : get mode of each row\n",
" | numeric_only : boolean, default False\n",
" | if True, only apply to numeric columns\n",
" | \n",
" | Returns\n",
" | -------\n",
" | modes : DataFrame (sorted)\n",
" | \n",
" | Examples\n",
" | --------\n",
" | >>> df = pd.DataFrame({'A': [1, 2, 1, 2, 1, 2, 3]})\n",
" | >>> df.mode()\n",
" | A\n",
" | 0 1\n",
" | 1 2\n",
" | \n",
" | mul(self, other, axis='columns', level=None, fill_value=None)\n",
" | Multiplication of dataframe and other, element-wise (binary operator `mul`).\n",
" | \n",
" | Equivalent to ``dataframe * other``, but with support to substitute a fill_value for\n",
" | missing data in one of the inputs.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | other : Series, DataFrame, or constant\n",
" | axis : {0, 1, 'index', 'columns'}\n",
" | For Series input, axis to match Series index on\n",
" | fill_value : None or float value, default None\n",
" | Fill missing (NaN) values with this value. If both DataFrame\n",
" | locations are missing, the result will be missing\n",
" | level : int or name\n",
" | Broadcast across a level, matching Index values on the\n",
" | passed MultiIndex level\n",
" | \n",
" | Notes\n",
" | -----\n",
" | Mismatched indices will be unioned together\n",
" | \n",
" | Returns\n",
" | -------\n",
" | result : DataFrame\n",
" | \n",
" | See also\n",
" | --------\n",
" | DataFrame.rmul\n",
" | \n",
" | multiply = mul(self, other, axis='columns', level=None, fill_value=None)\n",
" | \n",
" | ne(self, other, axis='columns', level=None)\n",
" | Wrapper for flexible comparison methods ne\n",
" | \n",
" | nlargest(self, n, columns, keep='first')\n",
" | Get the rows of a DataFrame sorted by the `n` largest\n",
" | values of `columns`.\n",
" | \n",
" | .. versionadded:: 0.17.0\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | n : int\n",
" | Number of items to retrieve\n",
" | columns : list or str\n",
" | Column name or names to order by\n",
" | keep : {'first', 'last'}, default 'first'\n",
" | Where there are duplicate values:\n",
" | - ``first`` : take the first occurrence.\n",
" | - ``last`` : take the last occurrence.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | DataFrame\n",
" | \n",
" | Examples\n",
" | --------\n",
" | >>> df = DataFrame({'a': [1, 10, 8, 11, -1],\n",
" | ... 'b': list('abdce'),\n",
" | ... 'c': [1.0, 2.0, np.nan, 3.0, 4.0]})\n",
" | >>> df.nlargest(3, 'a')\n",
" | a b c\n",
" | 3 11 c 3\n",
" | 1 10 b 2\n",
" | 2 8 d NaN\n",
" | \n",
" | notna(self)\n",
" | Return a boolean same-sized object indicating if the values are\n",
" | not NA.\n",
" | \n",
" | See Also\n",
" | --------\n",
" | DataFrame.isna : boolean inverse of notna\n",
" | DataFrame.notnull : alias of notna\n",
" | notna : top-level notna\n",
" | \n",
" | notnull(self)\n",
" | Return a boolean same-sized object indicating if the values are\n",
" | not NA.\n",
" | \n",
" | See Also\n",
" | --------\n",
" | DataFrame.isna : boolean inverse of notna\n",
" | DataFrame.notnull : alias of notna\n",
" | notna : top-level notna\n",
" | \n",
" | nsmallest(self, n, columns, keep='first')\n",
" | Get the rows of a DataFrame sorted by the `n` smallest\n",
" | values of `columns`.\n",
" | \n",
" | .. versionadded:: 0.17.0\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | n : int\n",
" | Number of items to retrieve\n",
" | columns : list or str\n",
" | Column name or names to order by\n",
" | keep : {'first', 'last'}, default 'first'\n",
" | Where there are duplicate values:\n",
" | - ``first`` : take the first occurrence.\n",
" | - ``last`` : take the last occurrence.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | DataFrame\n",
" | \n",
" | Examples\n",
" | --------\n",
" | >>> df = DataFrame({'a': [1, 10, 8, 11, -1],\n",
" | ... 'b': list('abdce'),\n",
" | ... 'c': [1.0, 2.0, np.nan, 3.0, 4.0]})\n",
" | >>> df.nsmallest(3, 'a')\n",
" | a b c\n",
" | 4 -1 e 4\n",
" | 0 1 a 1\n",
" | 2 8 d NaN\n",
" | \n",
" | nunique(self, axis=0, dropna=True)\n",
" | Return Series with number of distinct observations over requested\n",
" | axis.\n",
" | \n",
" | .. versionadded:: 0.20.0\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | axis : {0 or 'index', 1 or 'columns'}, default 0\n",
" | dropna : boolean, default True\n",
" | Don't include NaN in the counts.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | nunique : Series\n",
" | \n",
" | Examples\n",
" | --------\n",
" | >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [1, 1, 1]})\n",
" | >>> df.nunique()\n",
" | A 3\n",
" | B 1\n",
" | \n",
" | >>> df.nunique(axis=1)\n",
" | 0 1\n",
" | 1 2\n",
" | 2 2\n",
" | \n",
" | pivot(self, index=None, columns=None, values=None)\n",
" | Reshape data (produce a \"pivot\" table) based on column values. Uses\n",
" | unique values from index / columns to form axes of the resulting\n",
" | DataFrame.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | index : string or object, optional\n",
" | Column name to use to make new frame's index. If None, uses\n",
" | existing index.\n",
" | columns : string or object\n",
" | Column name to use to make new frame's columns\n",
" | values : string or object, optional\n",
" | Column name to use for populating new frame's values. If not\n",
" | specified, all remaining columns will be used and the result will\n",
" | have hierarchically indexed columns\n",
" | \n",
" | Returns\n",
" | -------\n",
" | pivoted : DataFrame\n",
" | \n",
" | See also\n",
" | --------\n",
" | DataFrame.pivot_table : generalization of pivot that can handle\n",
" | duplicate values for one index/column pair\n",
" | DataFrame.unstack : pivot based on the index values instead of a\n",
" | column\n",
" | \n",
" | Notes\n",
" | -----\n",
" | For finer-tuned control, see hierarchical indexing documentation along\n",
" | with the related stack/unstack methods\n",
" | \n",
" | Examples\n",
" | --------\n",
" | \n",
" | >>> df = pd.DataFrame({'foo': ['one','one','one','two','two','two'],\n",
" | 'bar': ['A', 'B', 'C', 'A', 'B', 'C'],\n",
" | 'baz': [1, 2, 3, 4, 5, 6]})\n",
" | >>> df\n",
" | foo bar baz\n",
" | 0 one A 1\n",
" | 1 one B 2\n",
" | 2 one C 3\n",
" | 3 two A 4\n",
" | 4 two B 5\n",
" | 5 two C 6\n",
" | \n",
" | >>> df.pivot(index='foo', columns='bar', values='baz')\n",
" | A B C\n",
" | one 1 2 3\n",
" | two 4 5 6\n",
" | \n",
" | >>> df.pivot(index='foo', columns='bar')['baz']\n",
" | A B C\n",
" | one 1 2 3\n",
" | two 4 5 6\n",
" | \n",
" | pivot_table(self, values=None, index=None, columns=None, aggfunc='mean', fill_value=None, margins=False, dropna=True, margins_name='All')\n",
" | Create a spreadsheet-style pivot table as a DataFrame. The levels in\n",
" | the pivot table will be stored in MultiIndex objects (hierarchical\n",
" | indexes) on the index and columns of the result DataFrame\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | values : column to aggregate, optional\n",
" | index : column, Grouper, array, or list of the previous\n",
" | If an array is passed, it must be the same length as the data. The\n",
" | list can contain any of the other types (except list).\n",
" | Keys to group by on the pivot table index. If an array is passed,\n",
" | it is being used as the same manner as column values.\n",
" | columns : column, Grouper, array, or list of the previous\n",
" | If an array is passed, it must be the same length as the data. The\n",
" | list can contain any of the other types (except list).\n",
" | Keys to group by on the pivot table column. If an array is passed,\n",
" | it is being used as the same manner as column values.\n",
" | aggfunc : function or list of functions, default numpy.mean\n",
" | If list of functions passed, the resulting pivot table will have\n",
" | hierarchical columns whose top level are the function names\n",
" | (inferred from the function objects themselves)\n",
" | fill_value : scalar, default None\n",
" | Value to replace missing values with\n",
" | margins : boolean, default False\n",
" | Add all row / columns (e.g. for subtotal / grand totals)\n",
" | dropna : boolean, default True\n",
" | Do not include columns whose entries are all NaN\n",
" | margins_name : string, default 'All'\n",
" | Name of the row / column that will contain the totals\n",
" | when margins is True.\n",
" | \n",
" | Examples\n",
" | --------\n",
" | >>> df = pd.DataFrame({\"A\": [\"foo\", \"foo\", \"foo\", \"foo\", \"foo\",\n",
" | ... \"bar\", \"bar\", \"bar\", \"bar\"],\n",
" | ... \"B\": [\"one\", \"one\", \"one\", \"two\", \"two\",\n",
" | ... \"one\", \"one\", \"two\", \"two\"],\n",
" | ... \"C\": [\"small\", \"large\", \"large\", \"small\",\n",
" | ... \"small\", \"large\", \"small\", \"small\",\n",
" | ... \"large\"],\n",
" | ... \"D\": [1, 2, 2, 3, 3, 4, 5, 6, 7]})\n",
" | >>> df\n",
" | A B C D\n",
" | 0 foo one small 1\n",
" | 1 foo one large 2\n",
" | 2 foo one large 2\n",
" | 3 foo two small 3\n",
" | 4 foo two small 3\n",
" | 5 bar one large 4\n",
" | 6 bar one small 5\n",
" | 7 bar two small 6\n",
" | 8 bar two large 7\n",
" | \n",
" | >>> table = pivot_table(df, values='D', index=['A', 'B'],\n",
" | ... columns=['C'], aggfunc=np.sum)\n",
" | >>> table\n",
" | ... # doctest: +NORMALIZE_WHITESPACE\n",
" | C large small\n",
" | A B\n",
" | bar one 4.0 5.0\n",
" | two 7.0 6.0\n",
" | foo one 4.0 1.0\n",
" | two NaN 6.0\n",
" | \n",
" | Returns\n",
" | -------\n",
" | table : DataFrame\n",
" | \n",
" | See also\n",
" | --------\n",
" | DataFrame.pivot : pivot without aggregation that can handle\n",
" | non-numeric data\n",
" | \n",
" | pow(self, other, axis='columns', level=None, fill_value=None)\n",
" | Exponential power of dataframe and other, element-wise (binary operator `pow`).\n",
" | \n",
" | Equivalent to ``dataframe ** other``, but with support to substitute a fill_value for\n",
" | missing data in one of the inputs.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | other : Series, DataFrame, or constant\n",
" | axis : {0, 1, 'index', 'columns'}\n",
" | For Series input, axis to match Series index on\n",
" | fill_value : None or float value, default None\n",
" | Fill missing (NaN) values with this value. If both DataFrame\n",
" | locations are missing, the result will be missing\n",
" | level : int or name\n",
" | Broadcast across a level, matching Index values on the\n",
" | passed MultiIndex level\n",
" | \n",
" | Notes\n",
" | -----\n",
" | Mismatched indices will be unioned together\n",
" | \n",
" | Returns\n",
" | -------\n",
" | result : DataFrame\n",
" | \n",
" | See also\n",
" | --------\n",
" | DataFrame.rpow\n",
" | \n",
" | prod(self, axis=None, skipna=None, level=None, numeric_only=None, min_count=0, **kwargs)\n",
" | Return the product of the values for the requested axis\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | axis : {index (0), columns (1)}\n",
" | skipna : boolean, default True\n",
" | Exclude NA/null values when computing the result.\n",
" | level : int or level name, default None\n",
" | If the axis is a MultiIndex (hierarchical), count along a\n",
" | particular level, collapsing into a Series\n",
" | numeric_only : boolean, default None\n",
" | Include only float, int, boolean columns. If None, will attempt to use\n",
" | everything, then use only numeric data. Not implemented for Series.\n",
" | min_count : int, default 0\n",
" | The required number of valid values to perform the operation. If fewer than\n",
" | ``min_count`` non-NA values are present the result will be NA.\n",
" | \n",
" | .. versionadded :: 0.22.0\n",
" | \n",
" | Added with the default being 1. This means the sum or product\n",
" | of an all-NA or empty series is ``NaN``.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | prod : Series or DataFrame (if level specified)\n",
" | \n",
" | Examples\n",
" | --------\n",
" | By default, the product of an empty or all-NA Series is ``1``\n",
" | \n",
" | >>> pd.Series([]).prod()\n",
" | 1.0\n",
" | \n",
" | This can be controlled with the ``min_count`` parameter\n",
" | \n",
" | >>> pd.Series([]).prod(min_count=1)\n",
" | nan\n",
" | \n",
" | Thanks to the ``skipna`` parameter, ``min_count`` handles all-NA and\n",
" | empty series identically.\n",
" | \n",
" | >>> pd.Series([np.nan]).prod()\n",
" | 1.0\n",
" | \n",
" | >>> pd.Series([np.nan]).sum(min_count=1)\n",
" | nan\n",
" | \n",
" | product = prod(self, axis=None, skipna=None, level=None, numeric_only=None, min_count=0, **kwargs)\n",
" | \n",
" | quantile(self, q=0.5, axis=0, numeric_only=True, interpolation='linear')\n",
" | Return values at the given quantile over requested axis, a la\n",
" | numpy.percentile.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | q : float or array-like, default 0.5 (50% quantile)\n",
" | 0 <= q <= 1, the quantile(s) to compute\n",
" | axis : {0, 1, 'index', 'columns'} (default 0)\n",
" | 0 or 'index' for row-wise, 1 or 'columns' for column-wise\n",
" | interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}\n",
" | .. versionadded:: 0.18.0\n",
" | \n",
" | This optional parameter specifies the interpolation method to use,\n",
" | when the desired quantile lies between two data points `i` and `j`:\n",
" | \n",
" | * linear: `i + (j - i) * fraction`, where `fraction` is the\n",
" | fractional part of the index surrounded by `i` and `j`.\n",
" | * lower: `i`.\n",
" | * higher: `j`.\n",
" | * nearest: `i` or `j` whichever is nearest.\n",
" | * midpoint: (`i` + `j`) / 2.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | quantiles : Series or DataFrame\n",
" | \n",
" | - If ``q`` is an array, a DataFrame will be returned where the\n",
" | index is ``q``, the columns are the columns of self, and the\n",
" | values are the quantiles.\n",
" | - If ``q`` is a float, a Series will be returned where the\n",
" | index is the columns of self and the values are the quantiles.\n",
" | \n",
" | Examples\n",
" | --------\n",
" | \n",
" | >>> df = DataFrame(np.array([[1, 1], [2, 10], [3, 100], [4, 100]]),\n",
" | columns=['a', 'b'])\n",
" | >>> df.quantile(.1)\n",
" | a 1.3\n",
" | b 3.7\n",
" | dtype: float64\n",
" | >>> df.quantile([.1, .5])\n",
" | a b\n",
" | 0.1 1.3 3.7\n",
" | 0.5 2.5 55.0\n",
" | \n",
" | query(self, expr, inplace=False, **kwargs)\n",
" | Query the columns of a frame with a boolean expression.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | expr : string\n",
" | The query string to evaluate. You can refer to variables\n",
" | in the environment by prefixing them with an '@' character like\n",
" | ``@a + b``.\n",
" | inplace : bool\n",
" | Whether the query should modify the data in place or return\n",
" | a modified copy\n",
" | \n",
" | .. versionadded:: 0.18.0\n",
" | \n",
" | kwargs : dict\n",
" | See the documentation for :func:`pandas.eval` for complete details\n",
" | on the keyword arguments accepted by :meth:`DataFrame.query`.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | q : DataFrame\n",
" | \n",
" | Notes\n",
" | -----\n",
" | The result of the evaluation of this expression is first passed to\n",
" | :attr:`DataFrame.loc` and if that fails because of a\n",
" | multidimensional key (e.g., a DataFrame) then the result will be passed\n",
" | to :meth:`DataFrame.__getitem__`.\n",
" | \n",
" | This method uses the top-level :func:`pandas.eval` function to\n",
" | evaluate the passed query.\n",
" | \n",
" | The :meth:`~pandas.DataFrame.query` method uses a slightly\n",
" | modified Python syntax by default. For example, the ``&`` and ``|``\n",
" | (bitwise) operators have the precedence of their boolean cousins,\n",
" | :keyword:`and` and :keyword:`or`. This *is* syntactically valid Python,\n",
" | however the semantics are different.\n",
" | \n",
" | You can change the semantics of the expression by passing the keyword\n",
" | argument ``parser='python'``. This enforces the same semantics as\n",
" | evaluation in Python space. Likewise, you can pass ``engine='python'``\n",
" | to evaluate an expression using Python itself as a backend. This is not\n",
" | recommended as it is inefficient compared to using ``numexpr`` as the\n",
" | engine.\n",
" | \n",
" | The :attr:`DataFrame.index` and\n",
" | :attr:`DataFrame.columns` attributes of the\n",
" | :class:`~pandas.DataFrame` instance are placed in the query namespace\n",
" | by default, which allows you to treat both the index and columns of the\n",
" | frame as a column in the frame.\n",
" | The identifier ``index`` is used for the frame index; you can also\n",
" | use the name of the index to identify it in a query.\n",
" | \n",
" | For further details and examples see the ``query`` documentation in\n",
" | :ref:`indexing `.\n",
" | \n",
" | See Also\n",
" | --------\n",
" | pandas.eval\n",
" | DataFrame.eval\n",
" | \n",
" | Examples\n",
" | --------\n",
" | >>> from numpy.random import randn\n",
" | >>> from pandas import DataFrame\n",
" | >>> df = DataFrame(randn(10, 2), columns=list('ab'))\n",
" | >>> df.query('a > b')\n",
" | >>> df[df.a > df.b] # same result as the previous expression\n",
" | \n",
" | radd(self, other, axis='columns', level=None, fill_value=None)\n",
" | Addition of dataframe and other, element-wise (binary operator `radd`).\n",
" | \n",
" | Equivalent to ``other + dataframe``, but with support to substitute a fill_value for\n",
" | missing data in one of the inputs.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | other : Series, DataFrame, or constant\n",
" | axis : {0, 1, 'index', 'columns'}\n",
" | For Series input, axis to match Series index on\n",
" | fill_value : None or float value, default None\n",
" | Fill missing (NaN) values with this value. If both DataFrame\n",
" | locations are missing, the result will be missing\n",
" | level : int or name\n",
" | Broadcast across a level, matching Index values on the\n",
" | passed MultiIndex level\n",
" | \n",
" | Notes\n",
" | -----\n",
" | Mismatched indices will be unioned together\n",
" | \n",
" | Returns\n",
" | -------\n",
" | result : DataFrame\n",
" | \n",
" | See also\n",
" | --------\n",
" | DataFrame.add\n",
" | \n",
" | rdiv = rtruediv(self, other, axis='columns', level=None, fill_value=None)\n",
" | \n",
" | reindex(self, labels=None, index=None, columns=None, axis=None, method=None, copy=True, level=None, fill_value=nan, limit=None, tolerance=None)\n",
" | Conform DataFrame to new index with optional filling logic, placing\n",
" | NA/NaN in locations having no value in the previous index. A new object\n",
" | is produced unless the new index is equivalent to the current one and\n",
" | copy=False\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | labels : array-like, optional\n",
" | New labels / index to conform the axis specified by 'axis' to.\n",
" | index, columns : array-like, optional (should be specified using keywords)\n",
" | New labels / index to conform to. Preferably an Index object to\n",
" | avoid duplicating data\n",
" | axis : int or str, optional\n",
" | Axis to target. Can be either the axis name ('index', 'columns')\n",
" | or number (0, 1).\n",
" | method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}, optional\n",
" | method to use for filling holes in reindexed DataFrame.\n",
" | Please note: this is only applicable to DataFrames/Series with a\n",
" | monotonically increasing/decreasing index.\n",
" | \n",
" | * default: don't fill gaps\n",
" | * pad / ffill: propagate last valid observation forward to next\n",
" | valid\n",
" | * backfill / bfill: use next valid observation to fill gap\n",
" | * nearest: use nearest valid observations to fill gap\n",
" | \n",
" | copy : boolean, default True\n",
" | Return a new object, even if the passed indexes are the same\n",
" | level : int or name\n",
" | Broadcast across a level, matching Index values on the\n",
" | passed MultiIndex level\n",
" | fill_value : scalar, default np.NaN\n",
" | Value to use for missing values. Defaults to NaN, but can be any\n",
" | \"compatible\" value\n",
" | limit : int, default None\n",
" | Maximum number of consecutive elements to forward or backward fill\n",
" | tolerance : optional\n",
" | Maximum distance between original and new labels for inexact\n",
" | matches. The values of the index at the matching locations most\n",
" | satisfy the equation ``abs(index[indexer] - target) <= tolerance``.\n",
" | \n",
" | Tolerance may be a scalar value, which applies the same tolerance\n",
" | to all values, or list-like, which applies variable tolerance per\n",
" | element. List-like includes list, tuple, array, Series, and must be\n",
" | the same size as the index and its dtype must exactly match the\n",
" | index's type.\n",
" | \n",
" | .. versionadded:: 0.17.0\n",
" | .. versionadded:: 0.21.0 (list-like tolerance)\n",
" | \n",
" | Examples\n",
" | --------\n",
" | \n",
" | ``DataFrame.reindex`` supports two calling conventions\n",
" | \n",
" | * ``(index=index_labels, columns=column_labels, ...)``\n",
" | * ``(labels, axis={'index', 'columns'}, ...)``\n",
" | \n",
" | We *highly* recommend using keyword arguments to clarify your\n",
" | intent.\n",
" | \n",
" | Create a dataframe with some fictional data.\n",
" | \n",
" | >>> index = ['Firefox', 'Chrome', 'Safari', 'IE10', 'Konqueror']\n",
" | >>> df = pd.DataFrame({\n",
" | ... 'http_status': [200,200,404,404,301],\n",
" | ... 'response_time': [0.04, 0.02, 0.07, 0.08, 1.0]},\n",
" | ... index=index)\n",
" | >>> df\n",
" | http_status response_time\n",
" | Firefox 200 0.04\n",
" | Chrome 200 0.02\n",
" | Safari 404 0.07\n",
" | IE10 404 0.08\n",
" | Konqueror 301 1.00\n",
" | \n",
" | Create a new index and reindex the dataframe. By default\n",
" | values in the new index that do not have corresponding\n",
" | records in the dataframe are assigned ``NaN``.\n",
" | \n",
" | >>> new_index= ['Safari', 'Iceweasel', 'Comodo Dragon', 'IE10',\n",
" | ... 'Chrome']\n",
" | >>> df.reindex(new_index)\n",
" | http_status response_time\n",
" | Safari 404.0 0.07\n",
" | Iceweasel NaN NaN\n",
" | Comodo Dragon NaN NaN\n",
" | IE10 404.0 0.08\n",
" | Chrome 200.0 0.02\n",
" | \n",
" | We can fill in the missing values by passing a value to\n",
" | the keyword ``fill_value``. Because the index is not monotonically\n",
" | increasing or decreasing, we cannot use arguments to the keyword\n",
" | ``method`` to fill the ``NaN`` values.\n",
" | \n",
" | >>> df.reindex(new_index, fill_value=0)\n",
" | http_status response_time\n",
" | Safari 404 0.07\n",
" | Iceweasel 0 0.00\n",
" | Comodo Dragon 0 0.00\n",
" | IE10 404 0.08\n",
" | Chrome 200 0.02\n",
" | \n",
" | >>> df.reindex(new_index, fill_value='missing')\n",
" | http_status response_time\n",
" | Safari 404 0.07\n",
" | Iceweasel missing missing\n",
" | Comodo Dragon missing missing\n",
" | IE10 404 0.08\n",
" | Chrome 200 0.02\n",
" | \n",
" | We can also reindex the columns.\n",
" | \n",
" | >>> df.reindex(columns=['http_status', 'user_agent'])\n",
" | http_status user_agent\n",
" | Firefox 200 NaN\n",
" | Chrome 200 NaN\n",
" | Safari 404 NaN\n",
" | IE10 404 NaN\n",
" | Konqueror 301 NaN\n",
" | \n",
" | Or we can use \"axis-style\" keyword arguments\n",
" | \n",
" | >>> df.reindex(['http_status', 'user_agent'], axis=\"columns\")\n",
" | http_status user_agent\n",
" | Firefox 200 NaN\n",
" | Chrome 200 NaN\n",
" | Safari 404 NaN\n",
" | IE10 404 NaN\n",
" | Konqueror 301 NaN\n",
" | \n",
" | To further illustrate the filling functionality in\n",
" | ``reindex``, we will create a dataframe with a\n",
" | monotonically increasing index (for example, a sequence\n",
" | of dates).\n",
" | \n",
" | >>> date_index = pd.date_range('1/1/2010', periods=6, freq='D')\n",
" | >>> df2 = pd.DataFrame({\"prices\": [100, 101, np.nan, 100, 89, 88]},\n",
" | ... index=date_index)\n",
" | >>> df2\n",
" | prices\n",
" | 2010-01-01 100\n",
" | 2010-01-02 101\n",
" | 2010-01-03 NaN\n",
" | 2010-01-04 100\n",
" | 2010-01-05 89\n",
" | 2010-01-06 88\n",
" | \n",
" | Suppose we decide to expand the dataframe to cover a wider\n",
" | date range.\n",
" | \n",
" | >>> date_index2 = pd.date_range('12/29/2009', periods=10, freq='D')\n",
" | >>> df2.reindex(date_index2)\n",
" | prices\n",
" | 2009-12-29 NaN\n",
" | 2009-12-30 NaN\n",
" | 2009-12-31 NaN\n",
" | 2010-01-01 100\n",
" | 2010-01-02 101\n",
" | 2010-01-03 NaN\n",
" | 2010-01-04 100\n",
" | 2010-01-05 89\n",
" | 2010-01-06 88\n",
" | 2010-01-07 NaN\n",
" | \n",
" | The index entries that did not have a value in the original data frame\n",
" | (for example, '2009-12-29') are by default filled with ``NaN``.\n",
" | If desired, we can fill in the missing values using one of several\n",
" | options.\n",
" | \n",
" | For example, to backpropagate the last valid value to fill the ``NaN``\n",
" | values, pass ``bfill`` as an argument to the ``method`` keyword.\n",
" | \n",
" | >>> df2.reindex(date_index2, method='bfill')\n",
" | prices\n",
" | 2009-12-29 100\n",
" | 2009-12-30 100\n",
" | 2009-12-31 100\n",
" | 2010-01-01 100\n",
" | 2010-01-02 101\n",
" | 2010-01-03 NaN\n",
" | 2010-01-04 100\n",
" | 2010-01-05 89\n",
" | 2010-01-06 88\n",
" | 2010-01-07 NaN\n",
" | \n",
" | Please note that the ``NaN`` value present in the original dataframe\n",
" | (at index value 2010-01-03) will not be filled by any of the\n",
" | value propagation schemes. This is because filling while reindexing\n",
" | does not look at dataframe values, but only compares the original and\n",
" | desired indexes. If you do want to fill in the ``NaN`` values present\n",
" | in the original dataframe, use the ``fillna()`` method.\n",
" | \n",
" | See the :ref:`user guide ` for more.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | reindexed : DataFrame\n",
" | \n",
" | reindex_axis(self, labels, axis=0, method=None, level=None, copy=True, limit=None, fill_value=nan)\n",
" | Conform input object to new index with optional\n",
" | filling logic, placing NA/NaN in locations having no value in the\n",
" | previous index. A new object is produced unless the new index is\n",
" | equivalent to the current one and copy=False\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | labels : array-like\n",
" | New labels / index to conform to. Preferably an Index object to\n",
" | avoid duplicating data\n",
" | axis : {0 or 'index', 1 or 'columns'}\n",
" | method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}, optional\n",
" | Method to use for filling holes in reindexed DataFrame:\n",
" | \n",
" | * default: don't fill gaps\n",
" | * pad / ffill: propagate last valid observation forward to next\n",
" | valid\n",
" | * backfill / bfill: use next valid observation to fill gap\n",
" | * nearest: use nearest valid observations to fill gap\n",
" | \n",
" | copy : boolean, default True\n",
" | Return a new object, even if the passed indexes are the same\n",
" | level : int or name\n",
" | Broadcast across a level, matching Index values on the\n",
" | passed MultiIndex level\n",
" | limit : int, default None\n",
" | Maximum number of consecutive elements to forward or backward fill\n",
" | tolerance : optional\n",
" | Maximum distance between original and new labels for inexact\n",
" | matches. The values of the index at the matching locations most\n",
" | satisfy the equation ``abs(index[indexer] - target) <= tolerance``.\n",
" | \n",
" | Tolerance may be a scalar value, which applies the same tolerance\n",
" | to all values, or list-like, which applies variable tolerance per\n",
" | element. List-like includes list, tuple, array, Series, and must be\n",
" | the same size as the index and its dtype must exactly match the\n",
" | index's type.\n",
" | \n",
" | .. versionadded:: 0.17.0\n",
" | .. versionadded:: 0.21.0 (list-like tolerance)\n",
" | \n",
" | Examples\n",
" | --------\n",
" | >>> df.reindex_axis(['A', 'B', 'C'], axis=1)\n",
" | \n",
" | See Also\n",
" | --------\n",
" | reindex, reindex_like\n",
" | \n",
" | Returns\n",
" | -------\n",
" | reindexed : DataFrame\n",
" | \n",
" | rename(self, mapper=None, index=None, columns=None, axis=None, copy=True, inplace=False, level=None)\n",
" | Alter axes labels.\n",
" | \n",
" | Function / dict values must be unique (1-to-1). Labels not contained in\n",
" | a dict / Series will be left as-is. Extra labels listed don't throw an\n",
" | error.\n",
" | \n",
" | See the :ref:`user guide ` for more.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | mapper, index, columns : dict-like or function, optional\n",
" | dict-like or functions transformations to apply to\n",
" | that axis' values. Use either ``mapper`` and ``axis`` to\n",
" | specify the axis to target with ``mapper``, or ``index`` and\n",
" | ``columns``.\n",
" | axis : int or str, optional\n",
" | Axis to target with ``mapper``. Can be either the axis name\n",
" | ('index', 'columns') or number (0, 1). The default is 'index'.\n",
" | copy : boolean, default True\n",
" | Also copy underlying data\n",
" | inplace : boolean, default False\n",
" | Whether to return a new %(klass)s. If True then value of copy is\n",
" | ignored.\n",
" | level : int or level name, default None\n",
" | In case of a MultiIndex, only rename labels in the specified\n",
" | level.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | renamed : DataFrame\n",
" | \n",
" | See Also\n",
" | --------\n",
" | pandas.DataFrame.rename_axis\n",
" | \n",
" | Examples\n",
" | --------\n",
" | \n",
" | ``DataFrame.rename`` supports two calling conventions\n",
" | \n",
" | * ``(index=index_mapper, columns=columns_mapper, ...)``\n",
" | * ``(mapper, axis={'index', 'columns'}, ...)``\n",
" | \n",
" | We *highly* recommend using keyword arguments to clarify your\n",
" | intent.\n",
" | \n",
" | >>> df = pd.DataFrame({\"A\": [1, 2, 3], \"B\": [4, 5, 6]})\n",
" | >>> df.rename(index=str, columns={\"A\": \"a\", \"B\": \"c\"})\n",
" | a c\n",
" | 0 1 4\n",
" | 1 2 5\n",
" | 2 3 6\n",
" | \n",
" | >>> df.rename(index=str, columns={\"A\": \"a\", \"C\": \"c\"})\n",
" | a B\n",
" | 0 1 4\n",
" | 1 2 5\n",
" | 2 3 6\n",
" | \n",
" | Using axis-style parameters\n",
" | \n",
" | >>> df.rename(str.lower, axis='columns')\n",
" | a b\n",
" | 0 1 4\n",
" | 1 2 5\n",
" | 2 3 6\n",
" | \n",
" | >>> df.rename({1: 2, 2: 4}, axis='index')\n",
" | A B\n",
" | 0 1 4\n",
" | 2 2 5\n",
" | 4 3 6\n",
" | \n",
" | reorder_levels(self, order, axis=0)\n",
" | Rearrange index levels using input order.\n",
" | May not drop or duplicate levels\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | order : list of int or list of str\n",
" | List representing new level order. Reference level by number\n",
" | (position) or by key (label).\n",
" | axis : int\n",
" | Where to reorder levels.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | type of caller (new object)\n",
" | \n",
" | reset_index(self, level=None, drop=False, inplace=False, col_level=0, col_fill='')\n",
" | For DataFrame with multi-level index, return new DataFrame with\n",
" | labeling information in the columns under the index names, defaulting\n",
" | to 'level_0', 'level_1', etc. if any are None. For a standard index,\n",
" | the index name will be used (if set), otherwise a default 'index' or\n",
" | 'level_0' (if 'index' is already taken) will be used.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | level : int, str, tuple, or list, default None\n",
" | Only remove the given levels from the index. Removes all levels by\n",
" | default\n",
" | drop : boolean, default False\n",
" | Do not try to insert index into dataframe columns. This resets\n",
" | the index to the default integer index.\n",
" | inplace : boolean, default False\n",
" | Modify the DataFrame in place (do not create a new object)\n",
" | col_level : int or str, default 0\n",
" | If the columns have multiple levels, determines which level the\n",
" | labels are inserted into. By default it is inserted into the first\n",
" | level.\n",
" | col_fill : object, default ''\n",
" | If the columns have multiple levels, determines how the other\n",
" | levels are named. If None then the index name is repeated.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | resetted : DataFrame\n",
" | \n",
" | Examples\n",
" | --------\n",
" | >>> df = pd.DataFrame([('bird', 389.0),\n",
" | ... ('bird', 24.0),\n",
" | ... ('mammal', 80.5),\n",
" | ... ('mammal', np.nan)],\n",
" | ... index=['falcon', 'parrot', 'lion', 'monkey'],\n",
" | ... columns=('class', 'max_speed'))\n",
" | >>> df\n",
" | class max_speed\n",
" | falcon bird 389.0\n",
" | parrot bird 24.0\n",
" | lion mammal 80.5\n",
" | monkey mammal NaN\n",
" | \n",
" | When we reset the index, the old index is added as a column, and a\n",
" | new sequential index is used:\n",
" | \n",
" | >>> df.reset_index()\n",
" | index class max_speed\n",
" | 0 falcon bird 389.0\n",
" | 1 parrot bird 24.0\n",
" | 2 lion mammal 80.5\n",
" | 3 monkey mammal NaN\n",
" | \n",
" | We can use the `drop` parameter to avoid the old index being added as\n",
" | a column:\n",
" | \n",
" | >>> df.reset_index(drop=True)\n",
" | class max_speed\n",
" | 0 bird 389.0\n",
" | 1 bird 24.0\n",
" | 2 mammal 80.5\n",
" | 3 mammal NaN\n",
" | \n",
" | You can also use `reset_index` with `MultiIndex`.\n",
" | \n",
" | >>> index = pd.MultiIndex.from_tuples([('bird', 'falcon'),\n",
" | ... ('bird', 'parrot'),\n",
" | ... ('mammal', 'lion'),\n",
" | ... ('mammal', 'monkey')],\n",
" | ... names=['class', 'name'])\n",
" | >>> columns = pd.MultiIndex.from_tuples([('speed', 'max'),\n",
" | ... ('species', 'type')])\n",
" | >>> df = pd.DataFrame([(389.0, 'fly'),\n",
" | ... ( 24.0, 'fly'),\n",
" | ... ( 80.5, 'run'),\n",
" | ... (np.nan, 'jump')],\n",
" | ... index=index,\n",
" | ... columns=columns)\n",
" | >>> df\n",
" | speed species\n",
" | max type\n",
" | class name\n",
" | bird falcon 389.0 fly\n",
" | parrot 24.0 fly\n",
" | mammal lion 80.5 run\n",
" | monkey NaN jump\n",
" | \n",
" | If the index has multiple levels, we can reset a subset of them:\n",
" | \n",
" | >>> df.reset_index(level='class')\n",
" | class speed species\n",
" | max type\n",
" | name\n",
" | falcon bird 389.0 fly\n",
" | parrot bird 24.0 fly\n",
" | lion mammal 80.5 run\n",
" | monkey mammal NaN jump\n",
" | \n",
" | If we are not dropping the index, by default, it is placed in the top\n",
" | level. We can place it in another level:\n",
" | \n",
" | >>> df.reset_index(level='class', col_level=1)\n",
" | speed species\n",
" | class max type\n",
" | name\n",
" | falcon bird 389.0 fly\n",
" | parrot bird 24.0 fly\n",
" | lion mammal 80.5 run\n",
" | monkey mammal NaN jump\n",
" | \n",
" | When the index is inserted under another level, we can specify under\n",
" | which one with the parameter `col_fill`:\n",
" | \n",
" | >>> df.reset_index(level='class', col_level=1, col_fill='species')\n",
" | species speed species\n",
" | class max type\n",
" | name\n",
" | falcon bird 389.0 fly\n",
" | parrot bird 24.0 fly\n",
" | lion mammal 80.5 run\n",
" | monkey mammal NaN jump\n",
" | \n",
" | If we specify a nonexistent level for `col_fill`, it is created:\n",
" | \n",
" | >>> df.reset_index(level='class', col_level=1, col_fill='genus')\n",
" | genus speed species\n",
" | class max type\n",
" | name\n",
" | falcon bird 389.0 fly\n",
" | parrot bird 24.0 fly\n",
" | lion mammal 80.5 run\n",
" | monkey mammal NaN jump\n",
" | \n",
" | rfloordiv(self, other, axis='columns', level=None, fill_value=None)\n",
" | Integer division of dataframe and other, element-wise (binary operator `rfloordiv`).\n",
" | \n",
" | Equivalent to ``other // dataframe``, but with support to substitute a fill_value for\n",
" | missing data in one of the inputs.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | other : Series, DataFrame, or constant\n",
" | axis : {0, 1, 'index', 'columns'}\n",
" | For Series input, axis to match Series index on\n",
" | fill_value : None or float value, default None\n",
" | Fill missing (NaN) values with this value. If both DataFrame\n",
" | locations are missing, the result will be missing\n",
" | level : int or name\n",
" | Broadcast across a level, matching Index values on the\n",
" | passed MultiIndex level\n",
" | \n",
" | Notes\n",
" | -----\n",
" | Mismatched indices will be unioned together\n",
" | \n",
" | Returns\n",
" | -------\n",
" | result : DataFrame\n",
" | \n",
" | See also\n",
" | --------\n",
" | DataFrame.floordiv\n",
" | \n",
" | rmod(self, other, axis='columns', level=None, fill_value=None)\n",
" | Modulo of dataframe and other, element-wise (binary operator `rmod`).\n",
" | \n",
" | Equivalent to ``other % dataframe``, but with support to substitute a fill_value for\n",
" | missing data in one of the inputs.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | other : Series, DataFrame, or constant\n",
" | axis : {0, 1, 'index', 'columns'}\n",
" | For Series input, axis to match Series index on\n",
" | fill_value : None or float value, default None\n",
" | Fill missing (NaN) values with this value. If both DataFrame\n",
" | locations are missing, the result will be missing\n",
" | level : int or name\n",
" | Broadcast across a level, matching Index values on the\n",
" | passed MultiIndex level\n",
" | \n",
" | Notes\n",
" | -----\n",
" | Mismatched indices will be unioned together\n",
" | \n",
" | Returns\n",
" | -------\n",
" | result : DataFrame\n",
" | \n",
" | See also\n",
" | --------\n",
" | DataFrame.mod\n",
" | \n",
" | rmul(self, other, axis='columns', level=None, fill_value=None)\n",
" | Multiplication of dataframe and other, element-wise (binary operator `rmul`).\n",
" | \n",
" | Equivalent to ``other * dataframe``, but with support to substitute a fill_value for\n",
" | missing data in one of the inputs.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | other : Series, DataFrame, or constant\n",
" | axis : {0, 1, 'index', 'columns'}\n",
" | For Series input, axis to match Series index on\n",
" | fill_value : None or float value, default None\n",
" | Fill missing (NaN) values with this value. If both DataFrame\n",
" | locations are missing, the result will be missing\n",
" | level : int or name\n",
" | Broadcast across a level, matching Index values on the\n",
" | passed MultiIndex level\n",
" | \n",
" | Notes\n",
" | -----\n",
" | Mismatched indices will be unioned together\n",
" | \n",
" | Returns\n",
" | -------\n",
" | result : DataFrame\n",
" | \n",
" | See also\n",
" | --------\n",
" | DataFrame.mul\n",
" | \n",
" | rolling(self, window, min_periods=None, freq=None, center=False, win_type=None, on=None, axis=0, closed=None)\n",
" | Provides rolling window calculations.\n",
" | \n",
" | .. versionadded:: 0.18.0\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | window : int, or offset\n",
" | Size of the moving window. This is the number of observations used for\n",
" | calculating the statistic. Each window will be a fixed size.\n",
" | \n",
" | If its an offset then this will be the time period of each window. Each\n",
" | window will be a variable sized based on the observations included in\n",
" | the time-period. This is only valid for datetimelike indexes. This is\n",
" | new in 0.19.0\n",
" | min_periods : int, default None\n",
" | Minimum number of observations in window required to have a value\n",
" | (otherwise result is NA). For a window that is specified by an offset,\n",
" | this will default to 1.\n",
" | freq : string or DateOffset object, optional (default None)\n",
" | .. deprecated:: 0.18.0\n",
" | Frequency to conform the data to before computing the statistic.\n",
" | Specified as a frequency string or DateOffset object.\n",
" | center : boolean, default False\n",
" | Set the labels at the center of the window.\n",
" | win_type : string, default None\n",
" | Provide a window type. See the notes below.\n",
" | on : string, optional\n",
" | For a DataFrame, column on which to calculate\n",
" | the rolling window, rather than the index\n",
" | closed : string, default None\n",
" | Make the interval closed on the 'right', 'left', 'both' or\n",
" | 'neither' endpoints.\n",
" | For offset-based windows, it defaults to 'right'.\n",
" | For fixed windows, defaults to 'both'. Remaining cases not implemented\n",
" | for fixed windows.\n",
" | \n",
" | .. versionadded:: 0.20.0\n",
" | \n",
" | axis : int or string, default 0\n",
" | \n",
" | Returns\n",
" | -------\n",
" | a Window or Rolling sub-classed for the particular operation\n",
" | \n",
" | Examples\n",
" | --------\n",
" | \n",
" | >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]})\n",
" | >>> df\n",
" | B\n",
" | 0 0.0\n",
" | 1 1.0\n",
" | 2 2.0\n",
" | 3 NaN\n",
" | 4 4.0\n",
" | \n",
" | Rolling sum with a window length of 2, using the 'triang'\n",
" | window type.\n",
" | \n",
" | >>> df.rolling(2, win_type='triang').sum()\n",
" | B\n",
" | 0 NaN\n",
" | 1 1.0\n",
" | 2 2.5\n",
" | 3 NaN\n",
" | 4 NaN\n",
" | \n",
" | Rolling sum with a window length of 2, min_periods defaults\n",
" | to the window length.\n",
" | \n",
" | >>> df.rolling(2).sum()\n",
" | B\n",
" | 0 NaN\n",
" | 1 1.0\n",
" | 2 3.0\n",
" | 3 NaN\n",
" | 4 NaN\n",
" | \n",
" | Same as above, but explicity set the min_periods\n",
" | \n",
" | >>> df.rolling(2, min_periods=1).sum()\n",
" | B\n",
" | 0 0.0\n",
" | 1 1.0\n",
" | 2 3.0\n",
" | 3 2.0\n",
" | 4 4.0\n",
" | \n",
" | A ragged (meaning not-a-regular frequency), time-indexed DataFrame\n",
" | \n",
" | >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]},\n",
" | ....: index = [pd.Timestamp('20130101 09:00:00'),\n",
" | ....: pd.Timestamp('20130101 09:00:02'),\n",
" | ....: pd.Timestamp('20130101 09:00:03'),\n",
" | ....: pd.Timestamp('20130101 09:00:05'),\n",
" | ....: pd.Timestamp('20130101 09:00:06')])\n",
" | \n",
" | >>> df\n",
" | B\n",
" | 2013-01-01 09:00:00 0.0\n",
" | 2013-01-01 09:00:02 1.0\n",
" | 2013-01-01 09:00:03 2.0\n",
" | 2013-01-01 09:00:05 NaN\n",
" | 2013-01-01 09:00:06 4.0\n",
" | \n",
" | \n",
" | Contrasting to an integer rolling window, this will roll a variable\n",
" | length window corresponding to the time period.\n",
" | The default for min_periods is 1.\n",
" | \n",
" | >>> df.rolling('2s').sum()\n",
" | B\n",
" | 2013-01-01 09:00:00 0.0\n",
" | 2013-01-01 09:00:02 1.0\n",
" | 2013-01-01 09:00:03 3.0\n",
" | 2013-01-01 09:00:05 NaN\n",
" | 2013-01-01 09:00:06 4.0\n",
" | \n",
" | Notes\n",
" | -----\n",
" | By default, the result is set to the right edge of the window. This can be\n",
" | changed to the center of the window by setting ``center=True``.\n",
" | \n",
" | The `freq` keyword is used to conform time series data to a specified\n",
" | frequency by resampling the data. This is done with the default parameters\n",
" | of :meth:`~pandas.Series.resample` (i.e. using the `mean`).\n",
" | \n",
" | To learn more about the offsets & frequency strings, please see `this link\n",
" | `__.\n",
" | \n",
" | The recognized win_types are:\n",
" | \n",
" | * ``boxcar``\n",
" | * ``triang``\n",
" | * ``blackman``\n",
" | * ``hamming``\n",
" | * ``bartlett``\n",
" | * ``parzen``\n",
" | * ``bohman``\n",
" | * ``blackmanharris``\n",
" | * ``nuttall``\n",
" | * ``barthann``\n",
" | * ``kaiser`` (needs beta)\n",
" | * ``gaussian`` (needs std)\n",
" | * ``general_gaussian`` (needs power, width)\n",
" | * ``slepian`` (needs width).\n",
" | \n",
" | If ``win_type=None`` all points are evenly weighted. To learn more about\n",
" | different window types see `scipy.signal window functions\n",
" | `__.\n",
" | \n",
" | round(self, decimals=0, *args, **kwargs)\n",
" | Round a DataFrame to a variable number of decimal places.\n",
" | \n",
" | .. versionadded:: 0.17.0\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | decimals : int, dict, Series\n",
" | Number of decimal places to round each column to. If an int is\n",
" | given, round each column to the same number of places.\n",
" | Otherwise dict and Series round to variable numbers of places.\n",
" | Column names should be in the keys if `decimals` is a\n",
" | dict-like, or in the index if `decimals` is a Series. Any\n",
" | columns not included in `decimals` will be left as is. Elements\n",
" | of `decimals` which are not columns of the input will be\n",
" | ignored.\n",
" | \n",
" | Examples\n",
" | --------\n",
" | >>> df = pd.DataFrame(np.random.random([3, 3]),\n",
" | ... columns=['A', 'B', 'C'], index=['first', 'second', 'third'])\n",
" | >>> df\n",
" | A B C\n",
" | first 0.028208 0.992815 0.173891\n",
" | second 0.038683 0.645646 0.577595\n",
" | third 0.877076 0.149370 0.491027\n",
" | >>> df.round(2)\n",
" | A B C\n",
" | first 0.03 0.99 0.17\n",
" | second 0.04 0.65 0.58\n",
" | third 0.88 0.15 0.49\n",
" | >>> df.round({'A': 1, 'C': 2})\n",
" | A B C\n",
" | first 0.0 0.992815 0.17\n",
" | second 0.0 0.645646 0.58\n",
" | third 0.9 0.149370 0.49\n",
" | >>> decimals = pd.Series([1, 0, 2], index=['A', 'B', 'C'])\n",
" | >>> df.round(decimals)\n",
" | A B C\n",
" | first 0.0 1 0.17\n",
" | second 0.0 1 0.58\n",
" | third 0.9 0 0.49\n",
" | \n",
" | Returns\n",
" | -------\n",
" | DataFrame object\n",
" | \n",
" | See Also\n",
" | --------\n",
" | numpy.around\n",
" | Series.round\n",
" | \n",
" | rpow(self, other, axis='columns', level=None, fill_value=None)\n",
" | Exponential power of dataframe and other, element-wise (binary operator `rpow`).\n",
" | \n",
" | Equivalent to ``other ** dataframe``, but with support to substitute a fill_value for\n",
" | missing data in one of the inputs.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | other : Series, DataFrame, or constant\n",
" | axis : {0, 1, 'index', 'columns'}\n",
" | For Series input, axis to match Series index on\n",
" | fill_value : None or float value, default None\n",
" | Fill missing (NaN) values with this value. If both DataFrame\n",
" | locations are missing, the result will be missing\n",
" | level : int or name\n",
" | Broadcast across a level, matching Index values on the\n",
" | passed MultiIndex level\n",
" | \n",
" | Notes\n",
" | -----\n",
" | Mismatched indices will be unioned together\n",
" | \n",
" | Returns\n",
" | -------\n",
" | result : DataFrame\n",
" | \n",
" | See also\n",
" | --------\n",
" | DataFrame.pow\n",
" | \n",
" | rsub(self, other, axis='columns', level=None, fill_value=None)\n",
" | Subtraction of dataframe and other, element-wise (binary operator `rsub`).\n",
" | \n",
" | Equivalent to ``other - dataframe``, but with support to substitute a fill_value for\n",
" | missing data in one of the inputs.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | other : Series, DataFrame, or constant\n",
" | axis : {0, 1, 'index', 'columns'}\n",
" | For Series input, axis to match Series index on\n",
" | fill_value : None or float value, default None\n",
" | Fill missing (NaN) values with this value. If both DataFrame\n",
" | locations are missing, the result will be missing\n",
" | level : int or name\n",
" | Broadcast across a level, matching Index values on the\n",
" | passed MultiIndex level\n",
" | \n",
" | Notes\n",
" | -----\n",
" | Mismatched indices will be unioned together\n",
" | \n",
" | Returns\n",
" | -------\n",
" | result : DataFrame\n",
" | \n",
" | See also\n",
" | --------\n",
" | DataFrame.sub\n",
" | \n",
" | rtruediv(self, other, axis='columns', level=None, fill_value=None)\n",
" | Floating division of dataframe and other, element-wise (binary operator `rtruediv`).\n",
" | \n",
" | Equivalent to ``other / dataframe``, but with support to substitute a fill_value for\n",
" | missing data in one of the inputs.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | other : Series, DataFrame, or constant\n",
" | axis : {0, 1, 'index', 'columns'}\n",
" | For Series input, axis to match Series index on\n",
" | fill_value : None or float value, default None\n",
" | Fill missing (NaN) values with this value. If both DataFrame\n",
" | locations are missing, the result will be missing\n",
" | level : int or name\n",
" | Broadcast across a level, matching Index values on the\n",
" | passed MultiIndex level\n",
" | \n",
" | Notes\n",
" | -----\n",
" | Mismatched indices will be unioned together\n",
" | \n",
" | Returns\n",
" | -------\n",
" | result : DataFrame\n",
" | \n",
" | See also\n",
" | --------\n",
" | DataFrame.truediv\n",
" | \n",
" | select_dtypes(self, include=None, exclude=None)\n",
" | Return a subset of a DataFrame including/excluding columns based on\n",
" | their ``dtype``.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | include, exclude : scalar or list-like\n",
" | A selection of dtypes or strings to be included/excluded. At least\n",
" | one of these parameters must be supplied.\n",
" | \n",
" | Raises\n",
" | ------\n",
" | ValueError\n",
" | * If both of ``include`` and ``exclude`` are empty\n",
" | * If ``include`` and ``exclude`` have overlapping elements\n",
" | * If any kind of string dtype is passed in.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | subset : DataFrame\n",
" | The subset of the frame including the dtypes in ``include`` and\n",
" | excluding the dtypes in ``exclude``.\n",
" | \n",
" | Notes\n",
" | -----\n",
" | * To select all *numeric* types use the numpy dtype ``numpy.number``\n",
" | * To select strings you must use the ``object`` dtype, but note that\n",
" | this will return *all* object dtype columns\n",
" | * See the `numpy dtype hierarchy\n",
" | `__\n",
" | * To select datetimes, use np.datetime64, 'datetime' or 'datetime64'\n",
" | * To select timedeltas, use np.timedelta64, 'timedelta' or\n",
" | 'timedelta64'\n",
" | * To select Pandas categorical dtypes, use 'category'\n",
" | * To select Pandas datetimetz dtypes, use 'datetimetz' (new in 0.20.0),\n",
" | or a 'datetime64[ns, tz]' string\n",
" | \n",
" | Examples\n",
" | --------\n",
" | >>> df = pd.DataFrame({'a': np.random.randn(6).astype('f4'),\n",
" | ... 'b': [True, False] * 3,\n",
" | ... 'c': [1.0, 2.0] * 3})\n",
" | >>> df\n",
" | a b c\n",
" | 0 0.3962 True 1\n",
" | 1 0.1459 False 2\n",
" | 2 0.2623 True 1\n",
" | 3 0.0764 False 2\n",
" | 4 -0.9703 True 1\n",
" | 5 -1.2094 False 2\n",
" | >>> df.select_dtypes(include='bool')\n",
" | c\n",
" | 0 True\n",
" | 1 False\n",
" | 2 True\n",
" | 3 False\n",
" | 4 True\n",
" | 5 False\n",
" | >>> df.select_dtypes(include=['float64'])\n",
" | c\n",
" | 0 1\n",
" | 1 2\n",
" | 2 1\n",
" | 3 2\n",
" | 4 1\n",
" | 5 2\n",
" | >>> df.select_dtypes(exclude=['floating'])\n",
" | b\n",
" | 0 True\n",
" | 1 False\n",
" | 2 True\n",
" | 3 False\n",
" | 4 True\n",
" | 5 False\n",
" | \n",
" | sem(self, axis=None, skipna=None, level=None, ddof=1, numeric_only=None, **kwargs)\n",
" | Return unbiased standard error of the mean over requested axis.\n",
" | \n",
" | Normalized by N-1 by default. This can be changed using the ddof argument\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | axis : {index (0), columns (1)}\n",
" | skipna : boolean, default True\n",
" | Exclude NA/null values. If an entire row/column is NA, the result\n",
" | will be NA\n",
" | level : int or level name, default None\n",
" | If the axis is a MultiIndex (hierarchical), count along a\n",
" | particular level, collapsing into a Series\n",
" | ddof : int, default 1\n",
" | degrees of freedom\n",
" | numeric_only : boolean, default None\n",
" | Include only float, int, boolean columns. If None, will attempt to use\n",
" | everything, then use only numeric data. Not implemented for Series.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | sem : Series or DataFrame (if level specified)\n",
" | \n",
" | set_index(self, keys, drop=True, append=False, inplace=False, verify_integrity=False)\n",
" | Set the DataFrame index (row labels) using one or more existing\n",
" | columns. By default yields a new object.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | keys : column label or list of column labels / arrays\n",
" | drop : boolean, default True\n",
" | Delete columns to be used as the new index\n",
" | append : boolean, default False\n",
" | Whether to append columns to existing index\n",
" | inplace : boolean, default False\n",
" | Modify the DataFrame in place (do not create a new object)\n",
" | verify_integrity : boolean, default False\n",
" | Check the new index for duplicates. Otherwise defer the check until\n",
" | necessary. Setting to False will improve the performance of this\n",
" | method\n",
" | \n",
" | Examples\n",
" | --------\n",
" | >>> df = pd.DataFrame({'month': [1, 4, 7, 10],\n",
" | ... 'year': [2012, 2014, 2013, 2014],\n",
" | ... 'sale':[55, 40, 84, 31]})\n",
" | month sale year\n",
" | 0 1 55 2012\n",
" | 1 4 40 2014\n",
" | 2 7 84 2013\n",
" | 3 10 31 2014\n",
" | \n",
" | Set the index to become the 'month' column:\n",
" | \n",
" | >>> df.set_index('month')\n",
" | sale year\n",
" | month\n",
" | 1 55 2012\n",
" | 4 40 2014\n",
" | 7 84 2013\n",
" | 10 31 2014\n",
" | \n",
" | Create a multi-index using columns 'year' and 'month':\n",
" | \n",
" | >>> df.set_index(['year', 'month'])\n",
" | sale\n",
" | year month\n",
" | 2012 1 55\n",
" | 2014 4 40\n",
" | 2013 7 84\n",
" | 2014 10 31\n",
" | \n",
" | Create a multi-index using a set of values and a column:\n",
" | \n",
" | >>> df.set_index([[1, 2, 3, 4], 'year'])\n",
" | month sale\n",
" | year\n",
" | 1 2012 1 55\n",
" | 2 2014 4 40\n",
" | 3 2013 7 84\n",
" | 4 2014 10 31\n",
" | \n",
" | Returns\n",
" | -------\n",
" | dataframe : DataFrame\n",
" | \n",
" | set_value(self, index, col, value, takeable=False)\n",
" | Put single value at passed column and index\n",
" | \n",
" | .. deprecated:: 0.21.0\n",
" | \n",
" | Please use .at[] or .iat[] accessors.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | index : row label\n",
" | col : column label\n",
" | value : scalar value\n",
" | takeable : interpret the index/col as indexers, default False\n",
" | \n",
" | Returns\n",
" | -------\n",
" | frame : DataFrame\n",
" | If label pair is contained, will be reference to calling DataFrame,\n",
" | otherwise a new object\n",
" | \n",
" | shift(self, periods=1, freq=None, axis=0)\n",
" | Shift index by desired number of periods with an optional time freq\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | periods : int\n",
" | Number of periods to move, can be positive or negative\n",
" | freq : DateOffset, timedelta, or time rule string, optional\n",
" | Increment to use from the tseries module or time rule (e.g. 'EOM').\n",
" | See Notes.\n",
" | axis : {0 or 'index', 1 or 'columns'}\n",
" | \n",
" | Notes\n",
" | -----\n",
" | If freq is specified then the index values are shifted but the data\n",
" | is not realigned. That is, use freq if you would like to extend the\n",
" | index when shifting and preserve the original data.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | shifted : DataFrame\n",
" | \n",
" | skew(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs)\n",
" | Return unbiased skew over requested axis\n",
" | Normalized by N-1\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | axis : {index (0), columns (1)}\n",
" | skipna : boolean, default True\n",
" | Exclude NA/null values when computing the result.\n",
" | level : int or level name, default None\n",
" | If the axis is a MultiIndex (hierarchical), count along a\n",
" | particular level, collapsing into a Series\n",
" | numeric_only : boolean, default None\n",
" | Include only float, int, boolean columns. If None, will attempt to use\n",
" | everything, then use only numeric data. Not implemented for Series.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | skew : Series or DataFrame (if level specified)\n",
" | \n",
" | sort_index(self, axis=0, level=None, ascending=True, inplace=False, kind='quicksort', na_position='last', sort_remaining=True, by=None)\n",
" | Sort object by labels (along an axis)\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | axis : index, columns to direct sorting\n",
" | level : int or level name or list of ints or list of level names\n",
" | if not None, sort on values in specified index level(s)\n",
" | ascending : boolean, default True\n",
" | Sort ascending vs. descending\n",
" | inplace : bool, default False\n",
" | if True, perform operation in-place\n",
" | kind : {'quicksort', 'mergesort', 'heapsort'}, default 'quicksort'\n",
" | Choice of sorting algorithm. See also ndarray.np.sort for more\n",
" | information. `mergesort` is the only stable algorithm. For\n",
" | DataFrames, this option is only applied when sorting on a single\n",
" | column or label.\n",
" | na_position : {'first', 'last'}, default 'last'\n",
" | `first` puts NaNs at the beginning, `last` puts NaNs at the end.\n",
" | Not implemented for MultiIndex.\n",
" | sort_remaining : bool, default True\n",
" | if true and sorting by level and index is multilevel, sort by other\n",
" | levels too (in order) after sorting by specified level\n",
" | \n",
" | Returns\n",
" | -------\n",
" | sorted_obj : DataFrame\n",
" | \n",
" | sort_values(self, by, axis=0, ascending=True, inplace=False, kind='quicksort', na_position='last')\n",
" | Sort by the values along either axis\n",
" | \n",
" | .. versionadded:: 0.17.0\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | by : str or list of str\n",
" | Name or list of names which refer to the axis items.\n",
" | axis : {0 or 'index', 1 or 'columns'}, default 0\n",
" | Axis to direct sorting\n",
" | ascending : bool or list of bool, default True\n",
" | Sort ascending vs. descending. Specify list for multiple sort\n",
" | orders. If this is a list of bools, must match the length of\n",
" | the by.\n",
" | inplace : bool, default False\n",
" | if True, perform operation in-place\n",
" | kind : {'quicksort', 'mergesort', 'heapsort'}, default 'quicksort'\n",
" | Choice of sorting algorithm. See also ndarray.np.sort for more\n",
" | information. `mergesort` is the only stable algorithm. For\n",
" | DataFrames, this option is only applied when sorting on a single\n",
" | column or label.\n",
" | na_position : {'first', 'last'}, default 'last'\n",
" | `first` puts NaNs at the beginning, `last` puts NaNs at the end\n",
" | \n",
" | Returns\n",
" | -------\n",
" | sorted_obj : DataFrame\n",
" | \n",
" | Examples\n",
" | --------\n",
" | >>> df = pd.DataFrame({\n",
" | ... 'col1' : ['A', 'A', 'B', np.nan, 'D', 'C'],\n",
" | ... 'col2' : [2, 1, 9, 8, 7, 4],\n",
" | ... 'col3': [0, 1, 9, 4, 2, 3],\n",
" | ... })\n",
" | >>> df\n",
" | col1 col2 col3\n",
" | 0 A 2 0\n",
" | 1 A 1 1\n",
" | 2 B 9 9\n",
" | 3 NaN 8 4\n",
" | 4 D 7 2\n",
" | 5 C 4 3\n",
" | \n",
" | Sort by col1\n",
" | \n",
" | >>> df.sort_values(by=['col1'])\n",
" | col1 col2 col3\n",
" | 0 A 2 0\n",
" | 1 A 1 1\n",
" | 2 B 9 9\n",
" | 5 C 4 3\n",
" | 4 D 7 2\n",
" | 3 NaN 8 4\n",
" | \n",
" | Sort by multiple columns\n",
" | \n",
" | >>> df.sort_values(by=['col1', 'col2'])\n",
" | col1 col2 col3\n",
" | 1 A 1 1\n",
" | 0 A 2 0\n",
" | 2 B 9 9\n",
" | 5 C 4 3\n",
" | 4 D 7 2\n",
" | 3 NaN 8 4\n",
" | \n",
" | Sort Descending\n",
" | \n",
" | >>> df.sort_values(by='col1', ascending=False)\n",
" | col1 col2 col3\n",
" | 4 D 7 2\n",
" | 5 C 4 3\n",
" | 2 B 9 9\n",
" | 0 A 2 0\n",
" | 1 A 1 1\n",
" | 3 NaN 8 4\n",
" | \n",
" | Putting NAs first\n",
" | \n",
" | >>> df.sort_values(by='col1', ascending=False, na_position='first')\n",
" | col1 col2 col3\n",
" | 3 NaN 8 4\n",
" | 4 D 7 2\n",
" | 5 C 4 3\n",
" | 2 B 9 9\n",
" | 0 A 2 0\n",
" | 1 A 1 1\n",
" | \n",
" | sortlevel(self, level=0, axis=0, ascending=True, inplace=False, sort_remaining=True)\n",
" | DEPRECATED: use :meth:`DataFrame.sort_index`\n",
" | \n",
" | Sort multilevel index by chosen axis and primary level. Data will be\n",
" | lexicographically sorted by the chosen level followed by the other\n",
" | levels (in order)\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | level : int\n",
" | axis : {0 or 'index', 1 or 'columns'}, default 0\n",
" | ascending : boolean, default True\n",
" | inplace : boolean, default False\n",
" | Sort the DataFrame without creating a new instance\n",
" | sort_remaining : boolean, default True\n",
" | Sort by the other levels too.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | sorted : DataFrame\n",
" | \n",
" | See Also\n",
" | --------\n",
" | DataFrame.sort_index(level=...)\n",
" | \n",
" | stack(self, level=-1, dropna=True)\n",
" | Pivot a level of the (possibly hierarchical) column labels, returning a\n",
" | DataFrame (or Series in the case of an object with a single level of\n",
" | column labels) having a hierarchical index with a new inner-most level\n",
" | of row labels.\n",
" | The level involved will automatically get sorted.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | level : int, string, or list of these, default last level\n",
" | Level(s) to stack, can pass level name\n",
" | dropna : boolean, default True\n",
" | Whether to drop rows in the resulting Frame/Series with no valid\n",
" | values\n",
" | \n",
" | Examples\n",
" | ----------\n",
" | >>> s\n",
" | a b\n",
" | one 1. 2.\n",
" | two 3. 4.\n",
" | \n",
" | >>> s.stack()\n",
" | one a 1\n",
" | b 2\n",
" | two a 3\n",
" | b 4\n",
" | \n",
" | Returns\n",
" | -------\n",
" | stacked : DataFrame or Series\n",
" | \n",
" | std(self, axis=None, skipna=None, level=None, ddof=1, numeric_only=None, **kwargs)\n",
" | Return sample standard deviation over requested axis.\n",
" | \n",
" | Normalized by N-1 by default. This can be changed using the ddof argument\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | axis : {index (0), columns (1)}\n",
" | skipna : boolean, default True\n",
" | Exclude NA/null values. If an entire row/column is NA, the result\n",
" | will be NA\n",
" | level : int or level name, default None\n",
" | If the axis is a MultiIndex (hierarchical), count along a\n",
" | particular level, collapsing into a Series\n",
" | ddof : int, default 1\n",
" | degrees of freedom\n",
" | numeric_only : boolean, default None\n",
" | Include only float, int, boolean columns. If None, will attempt to use\n",
" | everything, then use only numeric data. Not implemented for Series.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | std : Series or DataFrame (if level specified)\n",
" | \n",
" | sub(self, other, axis='columns', level=None, fill_value=None)\n",
" | Subtraction of dataframe and other, element-wise (binary operator `sub`).\n",
" | \n",
" | Equivalent to ``dataframe - other``, but with support to substitute a fill_value for\n",
" | missing data in one of the inputs.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | other : Series, DataFrame, or constant\n",
" | axis : {0, 1, 'index', 'columns'}\n",
" | For Series input, axis to match Series index on\n",
" | fill_value : None or float value, default None\n",
" | Fill missing (NaN) values with this value. If both DataFrame\n",
" | locations are missing, the result will be missing\n",
" | level : int or name\n",
" | Broadcast across a level, matching Index values on the\n",
" | passed MultiIndex level\n",
" | \n",
" | Notes\n",
" | -----\n",
" | Mismatched indices will be unioned together\n",
" | \n",
" | Returns\n",
" | -------\n",
" | result : DataFrame\n",
" | \n",
" | See also\n",
" | --------\n",
" | DataFrame.rsub\n",
" | \n",
" | subtract = sub(self, other, axis='columns', level=None, fill_value=None)\n",
" | \n",
" | sum(self, axis=None, skipna=None, level=None, numeric_only=None, min_count=0, **kwargs)\n",
" | Return the sum of the values for the requested axis\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | axis : {index (0), columns (1)}\n",
" | skipna : boolean, default True\n",
" | Exclude NA/null values when computing the result.\n",
" | level : int or level name, default None\n",
" | If the axis is a MultiIndex (hierarchical), count along a\n",
" | particular level, collapsing into a Series\n",
" | numeric_only : boolean, default None\n",
" | Include only float, int, boolean columns. If None, will attempt to use\n",
" | everything, then use only numeric data. Not implemented for Series.\n",
" | min_count : int, default 0\n",
" | The required number of valid values to perform the operation. If fewer than\n",
" | ``min_count`` non-NA values are present the result will be NA.\n",
" | \n",
" | .. versionadded :: 0.22.0\n",
" | \n",
" | Added with the default being 1. This means the sum or product\n",
" | of an all-NA or empty series is ``NaN``.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | sum : Series or DataFrame (if level specified)\n",
" | \n",
" | Examples\n",
" | --------\n",
" | By default, the sum of an empty or all-NA Series is ``0``.\n",
" | \n",
" | >>> pd.Series([]).sum() # min_count=0 is the default\n",
" | 0.0\n",
" | \n",
" | This can be controlled with the ``min_count`` parameter. For example, if\n",
" | you'd like the sum of an empty series to be NaN, pass ``min_count=1``.\n",
" | \n",
" | >>> pd.Series([]).sum(min_count=1)\n",
" | nan\n",
" | \n",
" | Thanks to the ``skipna`` parameter, ``min_count`` handles all-NA and\n",
" | empty series identically.\n",
" | \n",
" | >>> pd.Series([np.nan]).sum()\n",
" | 0.0\n",
" | \n",
" | >>> pd.Series([np.nan]).sum(min_count=1)\n",
" | nan\n",
" | \n",
" | swaplevel(self, i=-2, j=-1, axis=0)\n",
" | Swap levels i and j in a MultiIndex on a particular axis\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | i, j : int, string (can be mixed)\n",
" | Level of index to be swapped. Can pass level name as string.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | swapped : type of caller (new object)\n",
" | \n",
" | .. versionchanged:: 0.18.1\n",
" | \n",
" | The indexes ``i`` and ``j`` are now optional, and default to\n",
" | the two innermost levels of the index.\n",
" | \n",
" | to_csv(self, path_or_buf=None, sep=',', na_rep='', float_format=None, columns=None, header=True, index=True, index_label=None, mode='w', encoding=None, compression=None, quoting=None, quotechar='\"', line_terminator='\\n', chunksize=None, tupleize_cols=None, date_format=None, doublequote=True, escapechar=None, decimal='.')\n",
" | Write DataFrame to a comma-separated values (csv) file\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | path_or_buf : string or file handle, default None\n",
" | File path or object, if None is provided the result is returned as\n",
" | a string.\n",
" | sep : character, default ','\n",
" | Field delimiter for the output file.\n",
" | na_rep : string, default ''\n",
" | Missing data representation\n",
" | float_format : string, default None\n",
" | Format string for floating point numbers\n",
" | columns : sequence, optional\n",
" | Columns to write\n",
" | header : boolean or list of string, default True\n",
" | Write out the column names. If a list of strings is given it is\n",
" | assumed to be aliases for the column names\n",
" | index : boolean, default True\n",
" | Write row names (index)\n",
" | index_label : string or sequence, or False, default None\n",
" | Column label for index column(s) if desired. If None is given, and\n",
" | `header` and `index` are True, then the index names are used. A\n",
" | sequence should be given if the DataFrame uses MultiIndex. If\n",
" | False do not print fields for index names. Use index_label=False\n",
" | for easier importing in R\n",
" | mode : str\n",
" | Python write mode, default 'w'\n",
" | encoding : string, optional\n",
" | A string representing the encoding to use in the output file,\n",
" | defaults to 'ascii' on Python 2 and 'utf-8' on Python 3.\n",
" | compression : string, optional\n",
" | a string representing the compression to use in the output file,\n",
" | allowed values are 'gzip', 'bz2', 'xz',\n",
" | only used when the first argument is a filename\n",
" | line_terminator : string, default ``'\\n'``\n",
" | The newline character or character sequence to use in the output\n",
" | file\n",
" | quoting : optional constant from csv module\n",
" | defaults to csv.QUOTE_MINIMAL. If you have set a `float_format`\n",
" | then floats are converted to strings and thus csv.QUOTE_NONNUMERIC\n",
" | will treat them as non-numeric\n",
" | quotechar : string (length 1), default '\\\"'\n",
" | character used to quote fields\n",
" | doublequote : boolean, default True\n",
" | Control quoting of `quotechar` inside a field\n",
" | escapechar : string (length 1), default None\n",
" | character used to escape `sep` and `quotechar` when appropriate\n",
" | chunksize : int or None\n",
" | rows to write at a time\n",
" | tupleize_cols : boolean, default False\n",
" | .. deprecated:: 0.21.0\n",
" | This argument will be removed and will always write each row\n",
" | of the multi-index as a separate row in the CSV file.\n",
" | \n",
" | Write MultiIndex columns as a list of tuples (if True) or in\n",
" | the new, expanded format, where each MultiIndex column is a row\n",
" | in the CSV (if False).\n",
" | date_format : string, default None\n",
" | Format string for datetime objects\n",
" | decimal: string, default '.'\n",
" | Character recognized as decimal separator. E.g. use ',' for\n",
" | European data\n",
" | \n",
" | to_dict(self, orient='dict', into=)\n",
" | Convert DataFrame to dictionary.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | orient : str {'dict', 'list', 'series', 'split', 'records', 'index'}\n",
" | Determines the type of the values of the dictionary.\n",
" | \n",
" | - dict (default) : dict like {column -> {index -> value}}\n",
" | - list : dict like {column -> [values]}\n",
" | - series : dict like {column -> Series(values)}\n",
" | - split : dict like\n",
" | {index -> [index], columns -> [columns], data -> [values]}\n",
" | - records : list like\n",
" | [{column -> value}, ... , {column -> value}]\n",
" | - index : dict like {index -> {column -> value}}\n",
" | \n",
" | .. versionadded:: 0.17.0\n",
" | \n",
" | Abbreviations are allowed. `s` indicates `series` and `sp`\n",
" | indicates `split`.\n",
" | \n",
" | into : class, default dict\n",
" | The collections.Mapping subclass used for all Mappings\n",
" | in the return value. Can be the actual class or an empty\n",
" | instance of the mapping type you want. If you want a\n",
" | collections.defaultdict, you must pass it initialized.\n",
" | \n",
" | .. versionadded:: 0.21.0\n",
" | \n",
" | Returns\n",
" | -------\n",
" | result : collections.Mapping like {column -> {index -> value}}\n",
" | \n",
" | Examples\n",
" | --------\n",
" | >>> df = pd.DataFrame(\n",
" | {'col1': [1, 2], 'col2': [0.5, 0.75]}, index=['a', 'b'])\n",
" | >>> df\n",
" | col1 col2\n",
" | a 1 0.1\n",
" | b 2 0.2\n",
" | >>> df.to_dict()\n",
" | {'col1': {'a': 1, 'b': 2}, 'col2': {'a': 0.5, 'b': 0.75}}\n",
" | \n",
" | You can specify the return orientation.\n",
" | \n",
" | >>> df.to_dict('series')\n",
" | {'col1': a 1\n",
" | b 2\n",
" | Name: col1, dtype: int64, 'col2': a 0.50\n",
" | b 0.75\n",
" | Name: col2, dtype: float64}\n",
" | >>> df.to_dict('split')\n",
" | {'columns': ['col1', 'col2'],\n",
" | 'data': [[1.0, 0.5], [2.0, 0.75]],\n",
" | 'index': ['a', 'b']}\n",
" | >>> df.to_dict('records')\n",
" | [{'col1': 1.0, 'col2': 0.5}, {'col1': 2.0, 'col2': 0.75}]\n",
" | >>> df.to_dict('index')\n",
" | {'a': {'col1': 1.0, 'col2': 0.5}, 'b': {'col1': 2.0, 'col2': 0.75}}\n",
" | \n",
" | You can also specify the mapping type.\n",
" | \n",
" | >>> from collections import OrderedDict, defaultdict\n",
" | >>> df.to_dict(into=OrderedDict)\n",
" | OrderedDict([('col1', OrderedDict([('a', 1), ('b', 2)])),\n",
" | ('col2', OrderedDict([('a', 0.5), ('b', 0.75)]))])\n",
" | \n",
" | If you want a `defaultdict`, you need to initialize it:\n",
" | \n",
" | >>> dd = defaultdict(list)\n",
" | >>> df.to_dict('records', into=dd)\n",
" | [defaultdict(, {'col2': 0.5, 'col1': 1.0}),\n",
" | defaultdict(, {'col2': 0.75, 'col1': 2.0})]\n",
" | \n",
" | to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='', float_format=None, columns=None, header=True, index=True, index_label=None, startrow=0, startcol=0, engine=None, merge_cells=True, encoding=None, inf_rep='inf', verbose=True, freeze_panes=None)\n",
" | Write DataFrame to an excel sheet\n",
" | \n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | excel_writer : string or ExcelWriter object\n",
" | File path or existing ExcelWriter\n",
" | sheet_name : string, default 'Sheet1'\n",
" | Name of sheet which will contain DataFrame\n",
" | na_rep : string, default ''\n",
" | Missing data representation\n",
" | float_format : string, default None\n",
" | Format string for floating point numbers\n",
" | columns : sequence, optional\n",
" | Columns to write\n",
" | header : boolean or list of string, default True\n",
" | Write out the column names. If a list of strings is given it is\n",
" | assumed to be aliases for the column names\n",
" | index : boolean, default True\n",
" | Write row names (index)\n",
" | index_label : string or sequence, default None\n",
" | Column label for index column(s) if desired. If None is given, and\n",
" | `header` and `index` are True, then the index names are used. A\n",
" | sequence should be given if the DataFrame uses MultiIndex.\n",
" | startrow :\n",
" | upper left cell row to dump data frame\n",
" | startcol :\n",
" | upper left cell column to dump data frame\n",
" | engine : string, default None\n",
" | write engine to use - you can also set this via the options\n",
" | ``io.excel.xlsx.writer``, ``io.excel.xls.writer``, and\n",
" | ``io.excel.xlsm.writer``.\n",
" | merge_cells : boolean, default True\n",
" | Write MultiIndex and Hierarchical Rows as merged cells.\n",
" | encoding: string, default None\n",
" | encoding of the resulting excel file. Only necessary for xlwt,\n",
" | other writers support unicode natively.\n",
" | inf_rep : string, default 'inf'\n",
" | Representation for infinity (there is no native representation for\n",
" | infinity in Excel)\n",
" | freeze_panes : tuple of integer (length 2), default None\n",
" | Specifies the one-based bottommost row and rightmost column that\n",
" | is to be frozen\n",
" | \n",
" | .. versionadded:: 0.20.0\n",
" | \n",
" | Notes\n",
" | -----\n",
" | If passing an existing ExcelWriter object, then the sheet will be added\n",
" | to the existing workbook. This can be used to save different\n",
" | DataFrames to one workbook:\n",
" | \n",
" | >>> writer = pd.ExcelWriter('output.xlsx')\n",
" | >>> df1.to_excel(writer,'Sheet1')\n",
" | >>> df2.to_excel(writer,'Sheet2')\n",
" | >>> writer.save()\n",
" | \n",
" | For compatibility with to_csv, to_excel serializes lists and dicts to\n",
" | strings before writing.\n",
" | \n",
" | to_feather(self, fname)\n",
" | write out the binary feather-format for DataFrames\n",
" | \n",
" | .. versionadded:: 0.20.0\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | fname : str\n",
" | string file path\n",
" | \n",
" | to_gbq(self, destination_table, project_id, chunksize=10000, verbose=True, reauth=False, if_exists='fail', private_key=None)\n",
" | Write a DataFrame to a Google BigQuery table.\n",
" | \n",
" | The main method a user calls to export pandas DataFrame contents to\n",
" | Google BigQuery table.\n",
" | \n",
" | Google BigQuery API Client Library v2 for Python is used.\n",
" | Documentation is available `here\n",
" | `__\n",
" | \n",
" | Authentication to the Google BigQuery service is via OAuth 2.0.\n",
" | \n",
" | - If \"private_key\" is not provided:\n",
" | \n",
" | By default \"application default credentials\" are used.\n",
" | \n",
" | If default application credentials are not found or are restrictive,\n",
" | user account credentials are used. In this case, you will be asked to\n",
" | grant permissions for product name 'pandas GBQ'.\n",
" | \n",
" | - If \"private_key\" is provided:\n",
" | \n",
" | Service account credentials will be used to authenticate.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | dataframe : DataFrame\n",
" | DataFrame to be written\n",
" | destination_table : string\n",
" | Name of table to be written, in the form 'dataset.tablename'\n",
" | project_id : str\n",
" | Google BigQuery Account project ID.\n",
" | chunksize : int (default 10000)\n",
" | Number of rows to be inserted in each chunk from the dataframe.\n",
" | verbose : boolean (default True)\n",
" | Show percentage complete\n",
" | reauth : boolean (default False)\n",
" | Force Google BigQuery to reauthenticate the user. This is useful\n",
" | if multiple accounts are used.\n",
" | if_exists : {'fail', 'replace', 'append'}, default 'fail'\n",
" | 'fail': If table exists, do nothing.\n",
" | 'replace': If table exists, drop it, recreate it, and insert data.\n",
" | 'append': If table exists, insert data. Create if does not exist.\n",
" | private_key : str (optional)\n",
" | Service account private key in JSON format. Can be file path\n",
" | or string contents. This is useful for remote server\n",
" | authentication (eg. jupyter iPython notebook on remote host)\n",
" | \n",
" | to_html(self, buf=None, columns=None, col_space=None, header=True, index=True, na_rep='NaN', formatters=None, float_format=None, sparsify=None, index_names=True, justify=None, bold_rows=True, classes=None, escape=True, max_rows=None, max_cols=None, show_dimensions=False, notebook=False, decimal='.', border=None)\n",
" | Render a DataFrame as an HTML table.\n",
" | \n",
" | `to_html`-specific options:\n",
" | \n",
" | bold_rows : boolean, default True\n",
" | Make the row labels bold in the output\n",
" | classes : str or list or tuple, default None\n",
" | CSS class(es) to apply to the resulting html table\n",
" | escape : boolean, default True\n",
" | Convert the characters <, >, and & to HTML-safe sequences.=\n",
" | max_rows : int, optional\n",
" | Maximum number of rows to show before truncating. If None, show\n",
" | all.\n",
" | max_cols : int, optional\n",
" | Maximum number of columns to show before truncating. If None, show\n",
" | all.\n",
" | decimal : string, default '.'\n",
" | Character recognized as decimal separator, e.g. ',' in Europe\n",
" | \n",
" | .. versionadded:: 0.18.0\n",
" | border : int\n",
" | A ``border=border`` attribute is included in the opening\n",
" | `` tag. Default ``pd.options.html.border``.\n",
" | \n",
" | .. versionadded:: 0.19.0\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | buf : StringIO-like, optional\n",
" | buffer to write to\n",
" | columns : sequence, optional\n",
" | the subset of columns to write; default None writes all columns\n",
" | col_space : int, optional\n",
" | the minimum width of each column\n",
" | header : bool, optional\n",
" | whether to print column labels, default True\n",
" | index : bool, optional\n",
" | whether to print index (row) labels, default True\n",
" | na_rep : string, optional\n",
" | string representation of NAN to use, default 'NaN'\n",
" | formatters : list or dict of one-parameter functions, optional\n",
" | formatter functions to apply to columns' elements by position or name,\n",
" | default None. The result of each function must be a unicode string.\n",
" | List must be of length equal to the number of columns.\n",
" | float_format : one-parameter function, optional\n",
" | formatter function to apply to columns' elements if they are floats,\n",
" | default None. The result of this function must be a unicode string.\n",
" | sparsify : bool, optional\n",
" | Set to False for a DataFrame with a hierarchical index to print every\n",
" | multiindex key at each row, default True\n",
" | index_names : bool, optional\n",
" | Prints the names of the indexes, default True\n",
" | line_width : int, optional\n",
" | Width to wrap a line in characters, default no wrap\n",
" | justify : {'left', 'right', 'center', 'justify',\n",
" | 'justify-all', 'start', 'end', 'inherit',\n",
" | 'match-parent', 'initial', 'unset'}, default None\n",
" | How to justify the column labels. If None uses the option from\n",
" | the print configuration (controlled by set_option), 'right' out\n",
" | of the box.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | formatted : string (or unicode, depending on data and options)\n",
" | \n",
" | to_panel(self)\n",
" | Transform long (stacked) format (DataFrame) into wide (3D, Panel)\n",
" | format.\n",
" | \n",
" | Currently the index of the DataFrame must be a 2-level MultiIndex. This\n",
" | may be generalized later\n",
" | \n",
" | Returns\n",
" | -------\n",
" | panel : Panel\n",
" | \n",
" | to_parquet(self, fname, engine='auto', compression='snappy', **kwargs)\n",
" | Write a DataFrame to the binary parquet format.\n",
" | \n",
" | .. versionadded:: 0.21.0\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | fname : str\n",
" | string file path\n",
" | engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto'\n",
" | Parquet reader library to use. If 'auto', then the option\n",
" | 'io.parquet.engine' is used. If 'auto', then the first\n",
" | library to be installed is used.\n",
" | compression : str, optional, default 'snappy'\n",
" | compression method, includes {'gzip', 'snappy', 'brotli'}\n",
" | kwargs\n",
" | Additional keyword arguments passed to the engine\n",
" | \n",
" | to_period(self, freq=None, axis=0, copy=True)\n",
" | Convert DataFrame from DatetimeIndex to PeriodIndex with desired\n",
" | frequency (inferred from index if not passed)\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | freq : string, default\n",
" | axis : {0 or 'index', 1 or 'columns'}, default 0\n",
" | The axis to convert (the index by default)\n",
" | copy : boolean, default True\n",
" | If False then underlying input data is not copied\n",
" | \n",
" | Returns\n",
" | -------\n",
" | ts : TimeSeries with PeriodIndex\n",
" | \n",
" | to_records(self, index=True, convert_datetime64=True)\n",
" | Convert DataFrame to record array. Index will be put in the\n",
" | 'index' field of the record array if requested\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | index : boolean, default True\n",
" | Include index in resulting record array, stored in 'index' field\n",
" | convert_datetime64 : boolean, default True\n",
" | Whether to convert the index to datetime.datetime if it is a\n",
" | DatetimeIndex\n",
" | \n",
" | Returns\n",
" | -------\n",
" | y : recarray\n",
" | \n",
" | to_sparse(self, fill_value=None, kind='block')\n",
" | Convert to SparseDataFrame\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | fill_value : float, default NaN\n",
" | kind : {'block', 'integer'}\n",
" | \n",
" | Returns\n",
" | -------\n",
" | y : SparseDataFrame\n",
" | \n",
" | to_stata(self, fname, convert_dates=None, write_index=True, encoding='latin-1', byteorder=None, time_stamp=None, data_label=None, variable_labels=None)\n",
" | A class for writing Stata binary dta files from array-like objects\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | fname : str or buffer\n",
" | String path of file-like object\n",
" | convert_dates : dict\n",
" | Dictionary mapping columns containing datetime types to stata\n",
" | internal format to use when wirting the dates. Options are 'tc',\n",
" | 'td', 'tm', 'tw', 'th', 'tq', 'ty'. Column can be either an integer\n",
" | or a name. Datetime columns that do not have a conversion type\n",
" | specified will be converted to 'tc'. Raises NotImplementedError if\n",
" | a datetime column has timezone information\n",
" | write_index : bool\n",
" | Write the index to Stata dataset.\n",
" | encoding : str\n",
" | Default is latin-1. Unicode is not supported\n",
" | byteorder : str\n",
" | Can be \">\", \"<\", \"little\", or \"big\". default is `sys.byteorder`\n",
" | time_stamp : datetime\n",
" | A datetime to use as file creation date. Default is the current\n",
" | time.\n",
" | dataset_label : str\n",
" | A label for the data set. Must be 80 characters or smaller.\n",
" | variable_labels : dict\n",
" | Dictionary containing columns as keys and variable labels as\n",
" | values. Each label must be 80 characters or smaller.\n",
" | \n",
" | .. versionadded:: 0.19.0\n",
" | \n",
" | Raises\n",
" | ------\n",
" | NotImplementedError\n",
" | * If datetimes contain timezone information\n",
" | * Column dtype is not representable in Stata\n",
" | ValueError\n",
" | * Columns listed in convert_dates are noth either datetime64[ns]\n",
" | or datetime.datetime\n",
" | * Column listed in convert_dates is not in DataFrame\n",
" | * Categorical label contains more than 32,000 characters\n",
" | \n",
" | .. versionadded:: 0.19.0\n",
" | \n",
" | Examples\n",
" | --------\n",
" | >>> writer = StataWriter('./data_file.dta', data)\n",
" | >>> writer.write_file()\n",
" | \n",
" | Or with dates\n",
" | \n",
" | >>> writer = StataWriter('./date_data_file.dta', data, {2 : 'tw'})\n",
" | >>> writer.write_file()\n",
" | \n",
" | to_string(self, buf=None, columns=None, col_space=None, header=True, index=True, na_rep='NaN', formatters=None, float_format=None, sparsify=None, index_names=True, justify=None, line_width=None, max_rows=None, max_cols=None, show_dimensions=False)\n",
" | Render a DataFrame to a console-friendly tabular output.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | buf : StringIO-like, optional\n",
" | buffer to write to\n",
" | columns : sequence, optional\n",
" | the subset of columns to write; default None writes all columns\n",
" | col_space : int, optional\n",
" | the minimum width of each column\n",
" | header : bool, optional\n",
" | Write out the column names. If a list of strings is given, it is assumed to be aliases for the column names\n",
" | index : bool, optional\n",
" | whether to print index (row) labels, default True\n",
" | na_rep : string, optional\n",
" | string representation of NAN to use, default 'NaN'\n",
" | formatters : list or dict of one-parameter functions, optional\n",
" | formatter functions to apply to columns' elements by position or name,\n",
" | default None. The result of each function must be a unicode string.\n",
" | List must be of length equal to the number of columns.\n",
" | float_format : one-parameter function, optional\n",
" | formatter function to apply to columns' elements if they are floats,\n",
" | default None. The result of this function must be a unicode string.\n",
" | sparsify : bool, optional\n",
" | Set to False for a DataFrame with a hierarchical index to print every\n",
" | multiindex key at each row, default True\n",
" | index_names : bool, optional\n",
" | Prints the names of the indexes, default True\n",
" | line_width : int, optional\n",
" | Width to wrap a line in characters, default no wrap\n",
" | justify : {'left', 'right', 'center', 'justify',\n",
" | 'justify-all', 'start', 'end', 'inherit',\n",
" | 'match-parent', 'initial', 'unset'}, default None\n",
" | How to justify the column labels. If None uses the option from\n",
" | the print configuration (controlled by set_option), 'right' out\n",
" | of the box.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | formatted : string (or unicode, depending on data and options)\n",
" | \n",
" | to_timestamp(self, freq=None, how='start', axis=0, copy=True)\n",
" | Cast to DatetimeIndex of timestamps, at *beginning* of period\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | freq : string, default frequency of PeriodIndex\n",
" | Desired frequency\n",
" | how : {'s', 'e', 'start', 'end'}\n",
" | Convention for converting period to timestamp; start of period\n",
" | vs. end\n",
" | axis : {0 or 'index', 1 or 'columns'}, default 0\n",
" | The axis to convert (the index by default)\n",
" | copy : boolean, default True\n",
" | If false then underlying input data is not copied\n",
" | \n",
" | Returns\n",
" | -------\n",
" | df : DataFrame with DatetimeIndex\n",
" | \n",
" | transform(self, func, *args, **kwargs)\n",
" | Call function producing a like-indexed NDFrame\n",
" | and return a NDFrame with the transformed values\n",
" | \n",
" | .. versionadded:: 0.20.0\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | func : callable, string, dictionary, or list of string/callables\n",
" | To apply to column\n",
" | \n",
" | Accepted Combinations are:\n",
" | \n",
" | - string function name\n",
" | - function\n",
" | - list of functions\n",
" | - dict of column names -> functions (or list of functions)\n",
" | \n",
" | Returns\n",
" | -------\n",
" | transformed : NDFrame\n",
" | \n",
" | Examples\n",
" | --------\n",
" | >>> df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'],\n",
" | ... index=pd.date_range('1/1/2000', periods=10))\n",
" | df.iloc[3:7] = np.nan\n",
" | \n",
" | >>> df.transform(lambda x: (x - x.mean()) / x.std())\n",
" | A B C\n",
" | 2000-01-01 0.579457 1.236184 0.123424\n",
" | 2000-01-02 0.370357 -0.605875 -1.231325\n",
" | 2000-01-03 1.455756 -0.277446 0.288967\n",
" | 2000-01-04 NaN NaN NaN\n",
" | 2000-01-05 NaN NaN NaN\n",
" | 2000-01-06 NaN NaN NaN\n",
" | 2000-01-07 NaN NaN NaN\n",
" | 2000-01-08 -0.498658 1.274522 1.642524\n",
" | 2000-01-09 -0.540524 -1.012676 -0.828968\n",
" | 2000-01-10 -1.366388 -0.614710 0.005378\n",
" | \n",
" | See also\n",
" | --------\n",
" | pandas.NDFrame.aggregate\n",
" | pandas.NDFrame.apply\n",
" | \n",
" | transpose(self, *args, **kwargs)\n",
" | Transpose index and columns\n",
" | \n",
" | truediv(self, other, axis='columns', level=None, fill_value=None)\n",
" | Floating division of dataframe and other, element-wise (binary operator `truediv`).\n",
" | \n",
" | Equivalent to ``dataframe / other``, but with support to substitute a fill_value for\n",
" | missing data in one of the inputs.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | other : Series, DataFrame, or constant\n",
" | axis : {0, 1, 'index', 'columns'}\n",
" | For Series input, axis to match Series index on\n",
" | fill_value : None or float value, default None\n",
" | Fill missing (NaN) values with this value. If both DataFrame\n",
" | locations are missing, the result will be missing\n",
" | level : int or name\n",
" | Broadcast across a level, matching Index values on the\n",
" | passed MultiIndex level\n",
" | \n",
" | Notes\n",
" | -----\n",
" | Mismatched indices will be unioned together\n",
" | \n",
" | Returns\n",
" | -------\n",
" | result : DataFrame\n",
" | \n",
" | See also\n",
" | --------\n",
" | DataFrame.rtruediv\n",
" | \n",
" | unstack(self, level=-1, fill_value=None)\n",
" | Pivot a level of the (necessarily hierarchical) index labels, returning\n",
" | a DataFrame having a new level of column labels whose inner-most level\n",
" | consists of the pivoted index labels. If the index is not a MultiIndex,\n",
" | the output will be a Series (the analogue of stack when the columns are\n",
" | not a MultiIndex).\n",
" | The level involved will automatically get sorted.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | level : int, string, or list of these, default -1 (last level)\n",
" | Level(s) of index to unstack, can pass level name\n",
" | fill_value : replace NaN with this value if the unstack produces\n",
" | missing values\n",
" | \n",
" | .. versionadded: 0.18.0\n",
" | \n",
" | See also\n",
" | --------\n",
" | DataFrame.pivot : Pivot a table based on column values.\n",
" | DataFrame.stack : Pivot a level of the column labels (inverse operation\n",
" | from `unstack`).\n",
" | \n",
" | Examples\n",
" | --------\n",
" | >>> index = pd.MultiIndex.from_tuples([('one', 'a'), ('one', 'b'),\n",
" | ... ('two', 'a'), ('two', 'b')])\n",
" | >>> s = pd.Series(np.arange(1.0, 5.0), index=index)\n",
" | >>> s\n",
" | one a 1.0\n",
" | b 2.0\n",
" | two a 3.0\n",
" | b 4.0\n",
" | dtype: float64\n",
" | \n",
" | >>> s.unstack(level=-1)\n",
" | a b\n",
" | one 1.0 2.0\n",
" | two 3.0 4.0\n",
" | \n",
" | >>> s.unstack(level=0)\n",
" | one two\n",
" | a 1.0 3.0\n",
" | b 2.0 4.0\n",
" | \n",
" | >>> df = s.unstack(level=0)\n",
" | >>> df.unstack()\n",
" | one a 1.0\n",
" | b 2.0\n",
" | two a 3.0\n",
" | b 4.0\n",
" | dtype: float64\n",
" | \n",
" | Returns\n",
" | -------\n",
" | unstacked : DataFrame or Series\n",
" | \n",
" | update(self, other, join='left', overwrite=True, filter_func=None, raise_conflict=False)\n",
" | Modify DataFrame in place using non-NA values from passed\n",
" | DataFrame. Aligns on indices\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | other : DataFrame, or object coercible into a DataFrame\n",
" | join : {'left'}, default 'left'\n",
" | overwrite : boolean, default True\n",
" | If True then overwrite values for common keys in the calling frame\n",
" | filter_func : callable(1d-array) -> 1d-array, default None\n",
" | Can choose to replace values other than NA. Return True for values\n",
" | that should be updated\n",
" | raise_conflict : boolean\n",
" | If True, will raise an error if the DataFrame and other both\n",
" | contain data in the same place.\n",
" | \n",
" | Examples\n",
" | --------\n",
" | >>> df = pd.DataFrame({'A': [1, 2, 3],\n",
" | ... 'B': [400, 500, 600]})\n",
" | >>> new_df = pd.DataFrame({'B': [4, 5, 6],\n",
" | ... 'C': [7, 8, 9]})\n",
" | >>> df.update(new_df)\n",
" | >>> df\n",
" | A B\n",
" | 0 1 4\n",
" | 1 2 5\n",
" | 2 3 6\n",
" | \n",
" | >>> df = pd.DataFrame({'A': ['a', 'b', 'c'],\n",
" | ... 'B': ['x', 'y', 'z']})\n",
" | >>> new_df = pd.DataFrame({'B': ['d', 'e', 'f', 'g', 'h', 'i']})\n",
" | >>> df.update(new_df)\n",
" | >>> df\n",
" | A B\n",
" | 0 a d\n",
" | 1 b e\n",
" | 2 c f\n",
" | \n",
" | >>> df = pd.DataFrame({'A': ['a', 'b', 'c'],\n",
" | ... 'B': ['x', 'y', 'z']})\n",
" | >>> new_column = pd.Series(['d', 'e'], name='B', index=[0, 2])\n",
" | >>> df.update(new_column)\n",
" | >>> df\n",
" | A B\n",
" | 0 a d\n",
" | 1 b y\n",
" | 2 c e\n",
" | >>> df = pd.DataFrame({'A': ['a', 'b', 'c'],\n",
" | ... 'B': ['x', 'y', 'z']})\n",
" | >>> new_df = pd.DataFrame({'B': ['d', 'e']}, index=[1, 2])\n",
" | >>> df.update(new_df)\n",
" | >>> df\n",
" | A B\n",
" | 0 a x\n",
" | 1 b d\n",
" | 2 c e\n",
" | \n",
" | If ``other`` contains NaNs the corresponding values are not updated\n",
" | in the original dataframe.\n",
" | \n",
" | >>> df = pd.DataFrame({'A': [1, 2, 3],\n",
" | ... 'B': [400, 500, 600]})\n",
" | >>> new_df = pd.DataFrame({'B': [4, np.nan, 6]})\n",
" | >>> df.update(new_df)\n",
" | >>> df\n",
" | A B\n",
" | 0 1 4.0\n",
" | 1 2 500.0\n",
" | 2 3 6.0\n",
" | \n",
" | var(self, axis=None, skipna=None, level=None, ddof=1, numeric_only=None, **kwargs)\n",
" | Return unbiased variance over requested axis.\n",
" | \n",
" | Normalized by N-1 by default. This can be changed using the ddof argument\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | axis : {index (0), columns (1)}\n",
" | skipna : boolean, default True\n",
" | Exclude NA/null values. If an entire row/column is NA, the result\n",
" | will be NA\n",
" | level : int or level name, default None\n",
" | If the axis is a MultiIndex (hierarchical), count along a\n",
" | particular level, collapsing into a Series\n",
" | ddof : int, default 1\n",
" | degrees of freedom\n",
" | numeric_only : boolean, default None\n",
" | Include only float, int, boolean columns. If None, will attempt to use\n",
" | everything, then use only numeric data. Not implemented for Series.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | var : Series or DataFrame (if level specified)\n",
" | \n",
" | ----------------------------------------------------------------------\n",
" | Class methods defined here:\n",
" | \n",
" | from_csv(path, header=0, sep=',', index_col=0, parse_dates=True, encoding=None, tupleize_cols=None, infer_datetime_format=False) from builtins.type\n",
" | Read CSV file (DEPRECATED, please use :func:`pandas.read_csv`\n",
" | instead).\n",
" | \n",
" | It is preferable to use the more powerful :func:`pandas.read_csv`\n",
" | for most general purposes, but ``from_csv`` makes for an easy\n",
" | roundtrip to and from a file (the exact counterpart of\n",
" | ``to_csv``), especially with a DataFrame of time series data.\n",
" | \n",
" | This method only differs from the preferred :func:`pandas.read_csv`\n",
" | in some defaults:\n",
" | \n",
" | - `index_col` is ``0`` instead of ``None`` (take first column as index\n",
" | by default)\n",
" | - `parse_dates` is ``True`` instead of ``False`` (try parsing the index\n",
" | as datetime by default)\n",
" | \n",
" | So a ``pd.DataFrame.from_csv(path)`` can be replaced by\n",
" | ``pd.read_csv(path, index_col=0, parse_dates=True)``.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | path : string file path or file handle / StringIO\n",
" | header : int, default 0\n",
" | Row to use as header (skip prior rows)\n",
" | sep : string, default ','\n",
" | Field delimiter\n",
" | index_col : int or sequence, default 0\n",
" | Column to use for index. If a sequence is given, a MultiIndex\n",
" | is used. Different default from read_table\n",
" | parse_dates : boolean, default True\n",
" | Parse dates. Different default from read_table\n",
" | tupleize_cols : boolean, default False\n",
" | write multi_index columns as a list of tuples (if True)\n",
" | or new (expanded format) if False)\n",
" | infer_datetime_format: boolean, default False\n",
" | If True and `parse_dates` is True for a column, try to infer the\n",
" | datetime format based on the first datetime string. If the format\n",
" | can be inferred, there often will be a large parsing speed-up.\n",
" | \n",
" | See also\n",
" | --------\n",
" | pandas.read_csv\n",
" | \n",
" | Returns\n",
" | -------\n",
" | y : DataFrame\n",
" | \n",
" | from_dict(data, orient='columns', dtype=None) from builtins.type\n",
" | Construct DataFrame from dict of array-like or dicts\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | data : dict\n",
" | {field : array-like} or {field : dict}\n",
" | orient : {'columns', 'index'}, default 'columns'\n",
" | The \"orientation\" of the data. If the keys of the passed dict\n",
" | should be the columns of the resulting DataFrame, pass 'columns'\n",
" | (default). Otherwise if the keys should be rows, pass 'index'.\n",
" | dtype : dtype, default None\n",
" | Data type to force, otherwise infer\n",
" | \n",
" | Returns\n",
" | -------\n",
" | DataFrame\n",
" | \n",
" | from_items(items, columns=None, orient='columns') from builtins.type\n",
" | Convert (key, value) pairs to DataFrame. The keys will be the axis\n",
" | index (usually the columns, but depends on the specified\n",
" | orientation). The values should be arrays or Series.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | items : sequence of (key, value) pairs\n",
" | Values should be arrays or Series.\n",
" | columns : sequence of column labels, optional\n",
" | Must be passed if orient='index'.\n",
" | orient : {'columns', 'index'}, default 'columns'\n",
" | The \"orientation\" of the data. If the keys of the\n",
" | input correspond to column labels, pass 'columns'\n",
" | (default). Otherwise if the keys correspond to the index,\n",
" | pass 'index'.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | frame : DataFrame\n",
" | \n",
" | from_records(data, index=None, exclude=None, columns=None, coerce_float=False, nrows=None) from builtins.type\n",
" | Convert structured or record ndarray to DataFrame\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | data : ndarray (structured dtype), list of tuples, dict, or DataFrame\n",
" | index : string, list of fields, array-like\n",
" | Field of array to use as the index, alternately a specific set of\n",
" | input labels to use\n",
" | exclude : sequence, default None\n",
" | Columns or fields to exclude\n",
" | columns : sequence, default None\n",
" | Column names to use. If the passed data do not have names\n",
" | associated with them, this argument provides names for the\n",
" | columns. Otherwise this argument indicates the order of the columns\n",
" | in the result (any names not found in the data will become all-NA\n",
" | columns)\n",
" | coerce_float : boolean, default False\n",
" | Attempt to convert values of non-string, non-numeric objects (like\n",
" | decimal.Decimal) to floating point, useful for SQL result sets\n",
" | \n",
" | Returns\n",
" | -------\n",
" | df : DataFrame\n",
" | \n",
" | ----------------------------------------------------------------------\n",
" | Data descriptors defined here:\n",
" | \n",
" | T\n",
" | Transpose index and columns\n",
" | \n",
" | axes\n",
" | Return a list with the row axis labels and column axis labels as the\n",
" | only members. They are returned in that order.\n",
" | \n",
" | columns\n",
" | \n",
" | index\n",
" | \n",
" | shape\n",
" | Return a tuple representing the dimensionality of the DataFrame.\n",
" | \n",
" | style\n",
" | Property returning a Styler object containing methods for\n",
" | building a styled HTML representation fo the DataFrame.\n",
" | \n",
" | See Also\n",
" | --------\n",
" | pandas.io.formats.style.Styler\n",
" | \n",
" | ----------------------------------------------------------------------\n",
" | Data and other attributes defined here:\n",
" | \n",
" | plot = \n",
" | DataFrame plotting accessor and method\n",
" | \n",
" | Examples\n",
" | --------\n",
" | >>> df.plot.line()\n",
" | >>> df.plot.scatter('x', 'y')\n",
" | >>> df.plot.hexbin()\n",
" | \n",
" | These plotting methods can also be accessed by calling the accessor as a\n",
" | method with the ``kind`` argument:\n",
" | ``df.plot(kind='line')`` is equivalent to ``df.plot.line()``\n",
" | \n",
" | ----------------------------------------------------------------------\n",
" | Methods inherited from pandas.core.generic.NDFrame:\n",
" | \n",
" | __abs__(self)\n",
" | \n",
" | __array__(self, dtype=None)\n",
" | \n",
" | __array_wrap__(self, result, context=None)\n",
" | \n",
" | __bool__ = __nonzero__(self)\n",
" | \n",
" | __contains__(self, key)\n",
" | True if the key is in the info axis\n",
" | \n",
" | __copy__(self, deep=True)\n",
" | \n",
" | __deepcopy__(self, memo=None)\n",
" | \n",
" | __delitem__(self, key)\n",
" | Delete item\n",
" | \n",
" | __finalize__(self, other, method=None, **kwargs)\n",
" | Propagate metadata from other to self.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | other : the object from which to get the attributes that we are going\n",
" | to propagate\n",
" | method : optional, a passed method name ; possibly to take different\n",
" | types of propagation actions based on this\n",
" | \n",
" | __getattr__(self, name)\n",
" | After regular attribute access, try looking up the name\n",
" | This allows simpler access to columns for interactive use.\n",
" | \n",
" | __getstate__(self)\n",
" | \n",
" | __hash__(self)\n",
" | Return hash(self).\n",
" | \n",
" | __invert__(self)\n",
" | \n",
" | __iter__(self)\n",
" | Iterate over infor axis\n",
" | \n",
" | __neg__(self)\n",
" | \n",
" | __nonzero__(self)\n",
" | \n",
" | __round__(self, decimals=0)\n",
" | \n",
" | __setattr__(self, name, value)\n",
" | After regular attribute access, try setting the name\n",
" | This allows simpler access to columns for interactive use.\n",
" | \n",
" | __setstate__(self, state)\n",
" | \n",
" | abs(self)\n",
" | Return an object with absolute value taken--only applicable to objects\n",
" | that are all numeric.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | abs: type of caller\n",
" | \n",
" | add_prefix(self, prefix)\n",
" | Concatenate prefix string with panel items names.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | prefix : string\n",
" | \n",
" | Returns\n",
" | -------\n",
" | with_prefix : type of caller\n",
" | \n",
" | add_suffix(self, suffix)\n",
" | Concatenate suffix string with panel items names.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | suffix : string\n",
" | \n",
" | Returns\n",
" | -------\n",
" | with_suffix : type of caller\n",
" | \n",
" | as_blocks(self, copy=True)\n",
" | Convert the frame to a dict of dtype -> Constructor Types that each has\n",
" | a homogeneous dtype.\n",
" | \n",
" | .. deprecated:: 0.21.0\n",
" | \n",
" | NOTE: the dtypes of the blocks WILL BE PRESERVED HERE (unlike in\n",
" | as_matrix)\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | copy : boolean, default True\n",
" | \n",
" | Returns\n",
" | -------\n",
" | values : a dict of dtype -> Constructor Types\n",
" | \n",
" | as_matrix(self, columns=None)\n",
" | Convert the frame to its Numpy-array representation.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | columns: list, optional, default:None\n",
" | If None, return all columns, otherwise, returns specified columns.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | values : ndarray\n",
" | If the caller is heterogeneous and contains booleans or objects,\n",
" | the result will be of dtype=object. See Notes.\n",
" | \n",
" | \n",
" | Notes\n",
" | -----\n",
" | Return is NOT a Numpy-matrix, rather, a Numpy-array.\n",
" | \n",
" | The dtype will be a lower-common-denominator dtype (implicit\n",
" | upcasting); that is to say if the dtypes (even of numeric types)\n",
" | are mixed, the one that accommodates all will be chosen. Use this\n",
" | with care if you are not dealing with the blocks.\n",
" | \n",
" | e.g. If the dtypes are float16 and float32, dtype will be upcast to\n",
" | float32. If dtypes are int32 and uint8, dtype will be upcase to\n",
" | int32. By numpy.find_common_type convention, mixing int64 and uint64\n",
" | will result in a flot64 dtype.\n",
" | \n",
" | This method is provided for backwards compatibility. Generally,\n",
" | it is recommended to use '.values'.\n",
" | \n",
" | See Also\n",
" | --------\n",
" | pandas.DataFrame.values\n",
" | \n",
" | asfreq(self, freq, method=None, how=None, normalize=False, fill_value=None)\n",
" | Convert TimeSeries to specified frequency.\n",
" | \n",
" | Optionally provide filling method to pad/backfill missing values.\n",
" | \n",
" | Returns the original data conformed to a new index with the specified\n",
" | frequency. ``resample`` is more appropriate if an operation, such as\n",
" | summarization, is necessary to represent the data at the new frequency.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | freq : DateOffset object, or string\n",
" | method : {'backfill'/'bfill', 'pad'/'ffill'}, default None\n",
" | Method to use for filling holes in reindexed Series (note this\n",
" | does not fill NaNs that already were present):\n",
" | \n",
" | * 'pad' / 'ffill': propagate last valid observation forward to next\n",
" | valid\n",
" | * 'backfill' / 'bfill': use NEXT valid observation to fill\n",
" | how : {'start', 'end'}, default end\n",
" | For PeriodIndex only, see PeriodIndex.asfreq\n",
" | normalize : bool, default False\n",
" | Whether to reset output index to midnight\n",
" | fill_value: scalar, optional\n",
" | Value to use for missing values, applied during upsampling (note\n",
" | this does not fill NaNs that already were present).\n",
" | \n",
" | .. versionadded:: 0.20.0\n",
" | \n",
" | Returns\n",
" | -------\n",
" | converted : type of caller\n",
" | \n",
" | Examples\n",
" | --------\n",
" | \n",
" | Start by creating a series with 4 one minute timestamps.\n",
" | \n",
" | >>> index = pd.date_range('1/1/2000', periods=4, freq='T')\n",
" | >>> series = pd.Series([0.0, None, 2.0, 3.0], index=index)\n",
" | >>> df = pd.DataFrame({'s':series})\n",
" | >>> df\n",
" | s\n",
" | 2000-01-01 00:00:00 0.0\n",
" | 2000-01-01 00:01:00 NaN\n",
" | 2000-01-01 00:02:00 2.0\n",
" | 2000-01-01 00:03:00 3.0\n",
" | \n",
" | Upsample the series into 30 second bins.\n",
" | \n",
" | >>> df.asfreq(freq='30S')\n",
" | s\n",
" | 2000-01-01 00:00:00 0.0\n",
" | 2000-01-01 00:00:30 NaN\n",
" | 2000-01-01 00:01:00 NaN\n",
" | 2000-01-01 00:01:30 NaN\n",
" | 2000-01-01 00:02:00 2.0\n",
" | 2000-01-01 00:02:30 NaN\n",
" | 2000-01-01 00:03:00 3.0\n",
" | \n",
" | Upsample again, providing a ``fill value``.\n",
" | \n",
" | >>> df.asfreq(freq='30S', fill_value=9.0)\n",
" | s\n",
" | 2000-01-01 00:00:00 0.0\n",
" | 2000-01-01 00:00:30 9.0\n",
" | 2000-01-01 00:01:00 NaN\n",
" | 2000-01-01 00:01:30 9.0\n",
" | 2000-01-01 00:02:00 2.0\n",
" | 2000-01-01 00:02:30 9.0\n",
" | 2000-01-01 00:03:00 3.0\n",
" | \n",
" | Upsample again, providing a ``method``.\n",
" | \n",
" | >>> df.asfreq(freq='30S', method='bfill')\n",
" | s\n",
" | 2000-01-01 00:00:00 0.0\n",
" | 2000-01-01 00:00:30 NaN\n",
" | 2000-01-01 00:01:00 NaN\n",
" | 2000-01-01 00:01:30 2.0\n",
" | 2000-01-01 00:02:00 2.0\n",
" | 2000-01-01 00:02:30 3.0\n",
" | 2000-01-01 00:03:00 3.0\n",
" | \n",
" | See Also\n",
" | --------\n",
" | reindex\n",
" | \n",
" | Notes\n",
" | -----\n",
" | To learn more about the frequency strings, please see `this link\n",
" | `__.\n",
" | \n",
" | asof(self, where, subset=None)\n",
" | The last row without any NaN is taken (or the last row without\n",
" | NaN considering only the subset of columns in the case of a DataFrame)\n",
" | \n",
" | .. versionadded:: 0.19.0 For DataFrame\n",
" | \n",
" | If there is no good value, NaN is returned for a Series\n",
" | a Series of NaN values for a DataFrame\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | where : date or array of dates\n",
" | subset : string or list of strings, default None\n",
" | if not None use these columns for NaN propagation\n",
" | \n",
" | Notes\n",
" | -----\n",
" | Dates are assumed to be sorted\n",
" | Raises if this is not the case\n",
" | \n",
" | Returns\n",
" | -------\n",
" | where is scalar\n",
" | \n",
" | - value or NaN if input is Series\n",
" | - Series if input is DataFrame\n",
" | \n",
" | where is Index: same shape object as input\n",
" | \n",
" | See Also\n",
" | --------\n",
" | merge_asof\n",
" | \n",
" | astype(self, dtype, copy=True, errors='raise', **kwargs)\n",
" | Cast a pandas object to a specified dtype ``dtype``.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | dtype : data type, or dict of column name -> data type\n",
" | Use a numpy.dtype or Python type to cast entire pandas object to\n",
" | the same type. Alternatively, use {col: dtype, ...}, where col is a\n",
" | column label and dtype is a numpy.dtype or Python type to cast one\n",
" | or more of the DataFrame's columns to column-specific types.\n",
" | copy : bool, default True.\n",
" | Return a copy when ``copy=True`` (be very careful setting\n",
" | ``copy=False`` as changes to values then may propagate to other\n",
" | pandas objects).\n",
" | errors : {'raise', 'ignore'}, default 'raise'.\n",
" | Control raising of exceptions on invalid data for provided dtype.\n",
" | \n",
" | - ``raise`` : allow exceptions to be raised\n",
" | - ``ignore`` : suppress exceptions. On error return original object\n",
" | \n",
" | .. versionadded:: 0.20.0\n",
" | \n",
" | raise_on_error : raise on invalid input\n",
" | .. deprecated:: 0.20.0\n",
" | Use ``errors`` instead\n",
" | kwargs : keyword arguments to pass on to the constructor\n",
" | \n",
" | Returns\n",
" | -------\n",
" | casted : type of caller\n",
" | \n",
" | Examples\n",
" | --------\n",
" | >>> ser = pd.Series([1, 2], dtype='int32')\n",
" | >>> ser\n",
" | 0 1\n",
" | 1 2\n",
" | dtype: int32\n",
" | >>> ser.astype('int64')\n",
" | 0 1\n",
" | 1 2\n",
" | dtype: int64\n",
" | \n",
" | Convert to categorical type:\n",
" | \n",
" | >>> ser.astype('category')\n",
" | 0 1\n",
" | 1 2\n",
" | dtype: category\n",
" | Categories (2, int64): [1, 2]\n",
" | \n",
" | Convert to ordered categorical type with custom ordering:\n",
" | \n",
" | >>> ser.astype('category', ordered=True, categories=[2, 1])\n",
" | 0 1\n",
" | 1 2\n",
" | dtype: category\n",
" | Categories (2, int64): [2 < 1]\n",
" | \n",
" | Note that using ``copy=False`` and changing data on a new\n",
" | pandas object may propagate changes:\n",
" | \n",
" | >>> s1 = pd.Series([1,2])\n",
" | >>> s2 = s1.astype('int', copy=False)\n",
" | >>> s2[0] = 10\n",
" | >>> s1 # note that s1[0] has changed too\n",
" | 0 10\n",
" | 1 2\n",
" | dtype: int64\n",
" | \n",
" | See also\n",
" | --------\n",
" | pandas.to_datetime : Convert argument to datetime.\n",
" | pandas.to_timedelta : Convert argument to timedelta.\n",
" | pandas.to_numeric : Convert argument to a numeric type.\n",
" | numpy.ndarray.astype : Cast a numpy array to a specified type.\n",
" | \n",
" | at_time(self, time, asof=False)\n",
" | Select values at particular time of day (e.g. 9:30AM).\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | time : datetime.time or string\n",
" | \n",
" | Returns\n",
" | -------\n",
" | values_at_time : type of caller\n",
" | \n",
" | between_time(self, start_time, end_time, include_start=True, include_end=True)\n",
" | Select values between particular times of the day (e.g., 9:00-9:30 AM).\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | start_time : datetime.time or string\n",
" | end_time : datetime.time or string\n",
" | include_start : boolean, default True\n",
" | include_end : boolean, default True\n",
" | \n",
" | Returns\n",
" | -------\n",
" | values_between_time : type of caller\n",
" | \n",
" | bfill(self, axis=None, inplace=False, limit=None, downcast=None)\n",
" | Synonym for :meth:`DataFrame.fillna(method='bfill') `\n",
" | \n",
" | bool(self)\n",
" | Return the bool of a single element PandasObject.\n",
" | \n",
" | This must be a boolean scalar value, either True or False. Raise a\n",
" | ValueError if the PandasObject does not have exactly 1 element, or that\n",
" | element is not boolean\n",
" | \n",
" | clip(self, lower=None, upper=None, axis=None, inplace=False, *args, **kwargs)\n",
" | Trim values at input threshold(s).\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | lower : float or array_like, default None\n",
" | upper : float or array_like, default None\n",
" | axis : int or string axis name, optional\n",
" | Align object with lower and upper along the given axis.\n",
" | inplace : boolean, default False\n",
" | Whether to perform the operation in place on the data\n",
" | .. versionadded:: 0.21.0\n",
" | \n",
" | Returns\n",
" | -------\n",
" | clipped : Series\n",
" | \n",
" | Examples\n",
" | --------\n",
" | >>> df\n",
" | 0 1\n",
" | 0 0.335232 -1.256177\n",
" | 1 -1.367855 0.746646\n",
" | 2 0.027753 -1.176076\n",
" | 3 0.230930 -0.679613\n",
" | 4 1.261967 0.570967\n",
" | \n",
" | >>> df.clip(-1.0, 0.5)\n",
" | 0 1\n",
" | 0 0.335232 -1.000000\n",
" | 1 -1.000000 0.500000\n",
" | 2 0.027753 -1.000000\n",
" | 3 0.230930 -0.679613\n",
" | 4 0.500000 0.500000\n",
" | \n",
" | >>> t\n",
" | 0 -0.3\n",
" | 1 -0.2\n",
" | 2 -0.1\n",
" | 3 0.0\n",
" | 4 0.1\n",
" | dtype: float64\n",
" | \n",
" | >>> df.clip(t, t + 1, axis=0)\n",
" | 0 1\n",
" | 0 0.335232 -0.300000\n",
" | 1 -0.200000 0.746646\n",
" | 2 0.027753 -0.100000\n",
" | 3 0.230930 0.000000\n",
" | 4 1.100000 0.570967\n",
" | \n",
" | clip_lower(self, threshold, axis=None, inplace=False)\n",
" | Return copy of the input with values below given value(s) truncated.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | threshold : float or array_like\n",
" | axis : int or string axis name, optional\n",
" | Align object with threshold along the given axis.\n",
" | inplace : boolean, default False\n",
" | Whether to perform the operation in place on the data\n",
" | .. versionadded:: 0.21.0\n",
" | \n",
" | See Also\n",
" | --------\n",
" | clip\n",
" | \n",
" | Returns\n",
" | -------\n",
" | clipped : same type as input\n",
" | \n",
" | clip_upper(self, threshold, axis=None, inplace=False)\n",
" | Return copy of input with values above given value(s) truncated.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | threshold : float or array_like\n",
" | axis : int or string axis name, optional\n",
" | Align object with threshold along the given axis.\n",
" | inplace : boolean, default False\n",
" | Whether to perform the operation in place on the data\n",
" | .. versionadded:: 0.21.0\n",
" | \n",
" | See Also\n",
" | --------\n",
" | clip\n",
" | \n",
" | Returns\n",
" | -------\n",
" | clipped : same type as input\n",
" | \n",
" | consolidate(self, inplace=False)\n",
" | DEPRECATED: consolidate will be an internal implementation only.\n",
" | \n",
" | convert_objects(self, convert_dates=True, convert_numeric=False, convert_timedeltas=True, copy=True)\n",
" | Deprecated.\n",
" | Attempt to infer better dtype for object columns\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | convert_dates : boolean, default True\n",
" | If True, convert to date where possible. If 'coerce', force\n",
" | conversion, with unconvertible values becoming NaT.\n",
" | convert_numeric : boolean, default False\n",
" | If True, attempt to coerce to numbers (including strings), with\n",
" | unconvertible values becoming NaN.\n",
" | convert_timedeltas : boolean, default True\n",
" | If True, convert to timedelta where possible. If 'coerce', force\n",
" | conversion, with unconvertible values becoming NaT.\n",
" | copy : boolean, default True\n",
" | If True, return a copy even if no copy is necessary (e.g. no\n",
" | conversion was done). Note: This is meant for internal use, and\n",
" | should not be confused with inplace.\n",
" | \n",
" | See Also\n",
" | --------\n",
" | pandas.to_datetime : Convert argument to datetime.\n",
" | pandas.to_timedelta : Convert argument to timedelta.\n",
" | pandas.to_numeric : Return a fixed frequency timedelta index,\n",
" | with day as the default.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | converted : same as input object\n",
" | \n",
" | copy(self, deep=True)\n",
" | Make a copy of this objects data.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | deep : boolean or string, default True\n",
" | Make a deep copy, including a copy of the data and the indices.\n",
" | With ``deep=False`` neither the indices or the data are copied.\n",
" | \n",
" | Note that when ``deep=True`` data is copied, actual python objects\n",
" | will not be copied recursively, only the reference to the object.\n",
" | This is in contrast to ``copy.deepcopy`` in the Standard Library,\n",
" | which recursively copies object data.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | copy : type of caller\n",
" | \n",
" | describe(self, percentiles=None, include=None, exclude=None)\n",
" | Generates descriptive statistics that summarize the central tendency,\n",
" | dispersion and shape of a dataset's distribution, excluding\n",
" | ``NaN`` values.\n",
" | \n",
" | Analyzes both numeric and object series, as well\n",
" | as ``DataFrame`` column sets of mixed data types. The output\n",
" | will vary depending on what is provided. Refer to the notes\n",
" | below for more detail.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | percentiles : list-like of numbers, optional\n",
" | The percentiles to include in the output. All should\n",
" | fall between 0 and 1. The default is\n",
" | ``[.25, .5, .75]``, which returns the 25th, 50th, and\n",
" | 75th percentiles.\n",
" | include : 'all', list-like of dtypes or None (default), optional\n",
" | A white list of data types to include in the result. Ignored\n",
" | for ``Series``. Here are the options:\n",
" | \n",
" | - 'all' : All columns of the input will be included in the output.\n",
" | - A list-like of dtypes : Limits the results to the\n",
" | provided data types.\n",
" | To limit the result to numeric types submit\n",
" | ``numpy.number``. To limit it instead to object columns submit\n",
" | the ``numpy.object`` data type. Strings\n",
" | can also be used in the style of\n",
" | ``select_dtypes`` (e.g. ``df.describe(include=['O'])``). To\n",
" | select pandas categorical columns, use ``'category'``\n",
" | - None (default) : The result will include all numeric columns.\n",
" | exclude : list-like of dtypes or None (default), optional,\n",
" | A black list of data types to omit from the result. Ignored\n",
" | for ``Series``. Here are the options:\n",
" | \n",
" | - A list-like of dtypes : Excludes the provided data types\n",
" | from the result. To exclude numeric types submit\n",
" | ``numpy.number``. To exclude object columns submit the data\n",
" | type ``numpy.object``. Strings can also be used in the style of\n",
" | ``select_dtypes`` (e.g. ``df.describe(include=['O'])``). To\n",
" | exclude pandas categorical columns, use ``'category'``\n",
" | - None (default) : The result will exclude nothing.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | summary: Series/DataFrame of summary statistics\n",
" | \n",
" | Notes\n",
" | -----\n",
" | For numeric data, the result's index will include ``count``,\n",
" | ``mean``, ``std``, ``min``, ``max`` as well as lower, ``50`` and\n",
" | upper percentiles. By default the lower percentile is ``25`` and the\n",
" | upper percentile is ``75``. The ``50`` percentile is the\n",
" | same as the median.\n",
" | \n",
" | For object data (e.g. strings or timestamps), the result's index\n",
" | will include ``count``, ``unique``, ``top``, and ``freq``. The ``top``\n",
" | is the most common value. The ``freq`` is the most common value's\n",
" | frequency. Timestamps also include the ``first`` and ``last`` items.\n",
" | \n",
" | If multiple object values have the highest count, then the\n",
" | ``count`` and ``top`` results will be arbitrarily chosen from\n",
" | among those with the highest count.\n",
" | \n",
" | For mixed data types provided via a ``DataFrame``, the default is to\n",
" | return only an analysis of numeric columns. If the dataframe consists\n",
" | only of object and categorical data without any numeric columns, the\n",
" | default is to return an analysis of both the object and categorical\n",
" | columns. If ``include='all'`` is provided as an option, the result\n",
" | will include a union of attributes of each type.\n",
" | \n",
" | The `include` and `exclude` parameters can be used to limit\n",
" | which columns in a ``DataFrame`` are analyzed for the output.\n",
" | The parameters are ignored when analyzing a ``Series``.\n",
" | \n",
" | Examples\n",
" | --------\n",
" | Describing a numeric ``Series``.\n",
" | \n",
" | >>> s = pd.Series([1, 2, 3])\n",
" | >>> s.describe()\n",
" | count 3.0\n",
" | mean 2.0\n",
" | std 1.0\n",
" | min 1.0\n",
" | 25% 1.5\n",
" | 50% 2.0\n",
" | 75% 2.5\n",
" | max 3.0\n",
" | \n",
" | Describing a categorical ``Series``.\n",
" | \n",
" | >>> s = pd.Series(['a', 'a', 'b', 'c'])\n",
" | >>> s.describe()\n",
" | count 4\n",
" | unique 3\n",
" | top a\n",
" | freq 2\n",
" | dtype: object\n",
" | \n",
" | Describing a timestamp ``Series``.\n",
" | \n",
" | >>> s = pd.Series([\n",
" | ... np.datetime64(\"2000-01-01\"),\n",
" | ... np.datetime64(\"2010-01-01\"),\n",
" | ... np.datetime64(\"2010-01-01\")\n",
" | ... ])\n",
" | >>> s.describe()\n",
" | count 3\n",
" | unique 2\n",
" | top 2010-01-01 00:00:00\n",
" | freq 2\n",
" | first 2000-01-01 00:00:00\n",
" | last 2010-01-01 00:00:00\n",
" | dtype: object\n",
" | \n",
" | Describing a ``DataFrame``. By default only numeric fields\n",
" | are returned.\n",
" | \n",
" | >>> df = pd.DataFrame({ 'object': ['a', 'b', 'c'],\n",
" | ... 'numeric': [1, 2, 3],\n",
" | ... 'categorical': pd.Categorical(['d','e','f'])\n",
" | ... })\n",
" | >>> df.describe()\n",
" | numeric\n",
" | count 3.0\n",
" | mean 2.0\n",
" | std 1.0\n",
" | min 1.0\n",
" | 25% 1.5\n",
" | 50% 2.0\n",
" | 75% 2.5\n",
" | max 3.0\n",
" | \n",
" | Describing all columns of a ``DataFrame`` regardless of data type.\n",
" | \n",
" | >>> df.describe(include='all')\n",
" | categorical numeric object\n",
" | count 3 3.0 3\n",
" | unique 3 NaN 3\n",
" | top f NaN c\n",
" | freq 1 NaN 1\n",
" | mean NaN 2.0 NaN\n",
" | std NaN 1.0 NaN\n",
" | min NaN 1.0 NaN\n",
" | 25% NaN 1.5 NaN\n",
" | 50% NaN 2.0 NaN\n",
" | 75% NaN 2.5 NaN\n",
" | max NaN 3.0 NaN\n",
" | \n",
" | Describing a column from a ``DataFrame`` by accessing it as\n",
" | an attribute.\n",
" | \n",
" | >>> df.numeric.describe()\n",
" | count 3.0\n",
" | mean 2.0\n",
" | std 1.0\n",
" | min 1.0\n",
" | 25% 1.5\n",
" | 50% 2.0\n",
" | 75% 2.5\n",
" | max 3.0\n",
" | Name: numeric, dtype: float64\n",
" | \n",
" | Including only numeric columns in a ``DataFrame`` description.\n",
" | \n",
" | >>> df.describe(include=[np.number])\n",
" | numeric\n",
" | count 3.0\n",
" | mean 2.0\n",
" | std 1.0\n",
" | min 1.0\n",
" | 25% 1.5\n",
" | 50% 2.0\n",
" | 75% 2.5\n",
" | max 3.0\n",
" | \n",
" | Including only string columns in a ``DataFrame`` description.\n",
" | \n",
" | >>> df.describe(include=[np.object])\n",
" | object\n",
" | count 3\n",
" | unique 3\n",
" | top c\n",
" | freq 1\n",
" | \n",
" | Including only categorical columns from a ``DataFrame`` description.\n",
" | \n",
" | >>> df.describe(include=['category'])\n",
" | categorical\n",
" | count 3\n",
" | unique 3\n",
" | top f\n",
" | freq 1\n",
" | \n",
" | Excluding numeric columns from a ``DataFrame`` description.\n",
" | \n",
" | >>> df.describe(exclude=[np.number])\n",
" | categorical object\n",
" | count 3 3\n",
" | unique 3 3\n",
" | top f c\n",
" | freq 1 1\n",
" | \n",
" | Excluding object columns from a ``DataFrame`` description.\n",
" | \n",
" | >>> df.describe(exclude=[np.object])\n",
" | categorical numeric\n",
" | count 3 3.0\n",
" | unique 3 NaN\n",
" | top f NaN\n",
" | freq 1 NaN\n",
" | mean NaN 2.0\n",
" | std NaN 1.0\n",
" | min NaN 1.0\n",
" | 25% NaN 1.5\n",
" | 50% NaN 2.0\n",
" | 75% NaN 2.5\n",
" | max NaN 3.0\n",
" | \n",
" | See Also\n",
" | --------\n",
" | DataFrame.count\n",
" | DataFrame.max\n",
" | DataFrame.min\n",
" | DataFrame.mean\n",
" | DataFrame.std\n",
" | DataFrame.select_dtypes\n",
" | \n",
" | drop(self, labels=None, axis=0, index=None, columns=None, level=None, inplace=False, errors='raise')\n",
" | Return new object with labels in requested axis removed.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | labels : single label or list-like\n",
" | Index or column labels to drop.\n",
" | axis : int or axis name\n",
" | Whether to drop labels from the index (0 / 'index') or\n",
" | columns (1 / 'columns').\n",
" | index, columns : single label or list-like\n",
" | Alternative to specifying `axis` (``labels, axis=1`` is\n",
" | equivalent to ``columns=labels``).\n",
" | \n",
" | .. versionadded:: 0.21.0\n",
" | level : int or level name, default None\n",
" | For MultiIndex\n",
" | inplace : bool, default False\n",
" | If True, do operation inplace and return None.\n",
" | errors : {'ignore', 'raise'}, default 'raise'\n",
" | If 'ignore', suppress error and existing labels are dropped.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | dropped : type of caller\n",
" | \n",
" | Examples\n",
" | --------\n",
" | >>> df = pd.DataFrame(np.arange(12).reshape(3,4),\n",
" | columns=['A', 'B', 'C', 'D'])\n",
" | >>> df\n",
" | A B C D\n",
" | 0 0 1 2 3\n",
" | 1 4 5 6 7\n",
" | 2 8 9 10 11\n",
" | \n",
" | Drop columns\n",
" | \n",
" | >>> df.drop(['B', 'C'], axis=1)\n",
" | A D\n",
" | 0 0 3\n",
" | 1 4 7\n",
" | 2 8 11\n",
" | \n",
" | >>> df.drop(columns=['B', 'C'])\n",
" | A D\n",
" | 0 0 3\n",
" | 1 4 7\n",
" | 2 8 11\n",
" | \n",
" | Drop a row by index\n",
" | \n",
" | >>> df.drop([0, 1])\n",
" | A B C D\n",
" | 2 8 9 10 11\n",
" | \n",
" | Notes\n",
" | -----\n",
" | Specifying both `labels` and `index` or `columns` will raise a\n",
" | ValueError.\n",
" | \n",
" | equals(self, other)\n",
" | Determines if two NDFrame objects contain the same elements. NaNs in\n",
" | the same location are considered equal.\n",
" | \n",
" | ffill(self, axis=None, inplace=False, limit=None, downcast=None)\n",
" | Synonym for :meth:`DataFrame.fillna(method='ffill') `\n",
" | \n",
" | filter(self, items=None, like=None, regex=None, axis=None)\n",
" | Subset rows or columns of dataframe according to labels in\n",
" | the specified index.\n",
" | \n",
" | Note that this routine does not filter a dataframe on its\n",
" | contents. The filter is applied to the labels of the index.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | items : list-like\n",
" | List of info axis to restrict to (must not all be present)\n",
" | like : string\n",
" | Keep info axis where \"arg in col == True\"\n",
" | regex : string (regular expression)\n",
" | Keep info axis with re.search(regex, col) == True\n",
" | axis : int or string axis name\n",
" | The axis to filter on. By default this is the info axis,\n",
" | 'index' for Series, 'columns' for DataFrame\n",
" | \n",
" | Returns\n",
" | -------\n",
" | same type as input object\n",
" | \n",
" | Examples\n",
" | --------\n",
" | >>> df\n",
" | one two three\n",
" | mouse 1 2 3\n",
" | rabbit 4 5 6\n",
" | \n",
" | >>> # select columns by name\n",
" | >>> df.filter(items=['one', 'three'])\n",
" | one three\n",
" | mouse 1 3\n",
" | rabbit 4 6\n",
" | \n",
" | >>> # select columns by regular expression\n",
" | >>> df.filter(regex='e$', axis=1)\n",
" | one three\n",
" | mouse 1 3\n",
" | rabbit 4 6\n",
" | \n",
" | >>> # select rows containing 'bbi'\n",
" | >>> df.filter(like='bbi', axis=0)\n",
" | one two three\n",
" | rabbit 4 5 6\n",
" | \n",
" | See Also\n",
" | --------\n",
" | pandas.DataFrame.loc\n",
" | \n",
" | Notes\n",
" | -----\n",
" | The ``items``, ``like``, and ``regex`` parameters are\n",
" | enforced to be mutually exclusive.\n",
" | \n",
" | ``axis`` defaults to the info axis that is used when indexing\n",
" | with ``[]``.\n",
" | \n",
" | first(self, offset)\n",
" | Convenience method for subsetting initial periods of time series data\n",
" | based on a date offset.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | offset : string, DateOffset, dateutil.relativedelta\n",
" | \n",
" | Examples\n",
" | --------\n",
" | ts.first('10D') -> First 10 days\n",
" | \n",
" | Returns\n",
" | -------\n",
" | subset : type of caller\n",
" | \n",
" | get(self, key, default=None)\n",
" | Get item from object for given key (DataFrame column, Panel slice,\n",
" | etc.). Returns default value if not found.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | key : object\n",
" | \n",
" | Returns\n",
" | -------\n",
" | value : type of items contained in object\n",
" | \n",
" | get_dtype_counts(self)\n",
" | Return the counts of dtypes in this object.\n",
" | \n",
" | get_ftype_counts(self)\n",
" | Return the counts of ftypes in this object.\n",
" | \n",
" | get_values(self)\n",
" | same as values (but handles sparseness conversions)\n",
" | \n",
" | groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, group_keys=True, squeeze=False, **kwargs)\n",
" | Group series using mapper (dict or key function, apply given function\n",
" | to group, return result as series) or by a series of columns.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | by : mapping, function, str, or iterable\n",
" | Used to determine the groups for the groupby.\n",
" | If ``by`` is a function, it's called on each value of the object's\n",
" | index. If a dict or Series is passed, the Series or dict VALUES\n",
" | will be used to determine the groups (the Series' values are first\n",
" | aligned; see ``.align()`` method). If an ndarray is passed, the\n",
" | values are used as-is determine the groups. A str or list of strs\n",
" | may be passed to group by the columns in ``self``\n",
" | axis : int, default 0\n",
" | level : int, level name, or sequence of such, default None\n",
" | If the axis is a MultiIndex (hierarchical), group by a particular\n",
" | level or levels\n",
" | as_index : boolean, default True\n",
" | For aggregated output, return object with group labels as the\n",
" | index. Only relevant for DataFrame input. as_index=False is\n",
" | effectively \"SQL-style\" grouped output\n",
" | sort : boolean, default True\n",
" | Sort group keys. Get better performance by turning this off.\n",
" | Note this does not influence the order of observations within each\n",
" | group. groupby preserves the order of rows within each group.\n",
" | group_keys : boolean, default True\n",
" | When calling apply, add group keys to index to identify pieces\n",
" | squeeze : boolean, default False\n",
" | reduce the dimensionality of the return type if possible,\n",
" | otherwise return a consistent type\n",
" | \n",
" | Examples\n",
" | --------\n",
" | DataFrame results\n",
" | \n",
" | >>> data.groupby(func, axis=0).mean()\n",
" | >>> data.groupby(['col1', 'col2'])['col3'].mean()\n",
" | \n",
" | DataFrame with hierarchical index\n",
" | \n",
" | >>> data.groupby(['col1', 'col2']).mean()\n",
" | \n",
" | Returns\n",
" | -------\n",
" | GroupBy object\n",
" | \n",
" | head(self, n=5)\n",
" | Return the first n rows.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | n : int, default 5\n",
" | Number of rows to select.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | obj_head : type of caller\n",
" | The first n rows of the caller object.\n",
" | \n",
" | infer_objects(self)\n",
" | Attempt to infer better dtypes for object columns.\n",
" | \n",
" | Attempts soft conversion of object-dtyped\n",
" | columns, leaving non-object and unconvertible\n",
" | columns unchanged. The inference rules are the\n",
" | same as during normal Series/DataFrame construction.\n",
" | \n",
" | .. versionadded:: 0.21.0\n",
" | \n",
" | See Also\n",
" | --------\n",
" | pandas.to_datetime : Convert argument to datetime.\n",
" | pandas.to_timedelta : Convert argument to timedelta.\n",
" | pandas.to_numeric : Convert argument to numeric typeR\n",
" | \n",
" | Returns\n",
" | -------\n",
" | converted : same type as input object\n",
" | \n",
" | Examples\n",
" | --------\n",
" | >>> df = pd.DataFrame({\"A\": [\"a\", 1, 2, 3]})\n",
" | >>> df = df.iloc[1:]\n",
" | >>> df\n",
" | A\n",
" | 1 1\n",
" | 2 2\n",
" | 3 3\n",
" | \n",
" | >>> df.dtypes\n",
" | A object\n",
" | dtype: object\n",
" | \n",
" | >>> df.infer_objects().dtypes\n",
" | A int64\n",
" | dtype: object\n",
" | \n",
" | interpolate(self, method='linear', axis=0, limit=None, inplace=False, limit_direction='forward', downcast=None, **kwargs)\n",
" | Interpolate values according to different methods.\n",
" | \n",
" | Please note that only ``method='linear'`` is supported for\n",
" | DataFrames/Series with a MultiIndex.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | method : {'linear', 'time', 'index', 'values', 'nearest', 'zero',\n",
" | 'slinear', 'quadratic', 'cubic', 'barycentric', 'krogh',\n",
" | 'polynomial', 'spline', 'piecewise_polynomial',\n",
" | 'from_derivatives', 'pchip', 'akima'}\n",
" | \n",
" | * 'linear': ignore the index and treat the values as equally\n",
" | spaced. This is the only method supported on MultiIndexes.\n",
" | default\n",
" | * 'time': interpolation works on daily and higher resolution\n",
" | data to interpolate given length of interval\n",
" | * 'index', 'values': use the actual numerical values of the index\n",
" | * 'nearest', 'zero', 'slinear', 'quadratic', 'cubic',\n",
" | 'barycentric', 'polynomial' is passed to\n",
" | ``scipy.interpolate.interp1d``. Both 'polynomial' and 'spline'\n",
" | require that you also specify an `order` (int),\n",
" | e.g. df.interpolate(method='polynomial', order=4).\n",
" | These use the actual numerical values of the index.\n",
" | * 'krogh', 'piecewise_polynomial', 'spline', 'pchip' and 'akima'\n",
" | are all wrappers around the scipy interpolation methods of\n",
" | similar names. These use the actual numerical values of the\n",
" | index. For more information on their behavior, see the\n",
" | `scipy documentation\n",
" | `__\n",
" | and `tutorial documentation\n",
" | `__\n",
" | * 'from_derivatives' refers to BPoly.from_derivatives which\n",
" | replaces 'piecewise_polynomial' interpolation method in\n",
" | scipy 0.18\n",
" | \n",
" | .. versionadded:: 0.18.1\n",
" | \n",
" | Added support for the 'akima' method\n",
" | Added interpolate method 'from_derivatives' which replaces\n",
" | 'piecewise_polynomial' in scipy 0.18; backwards-compatible with\n",
" | scipy < 0.18\n",
" | \n",
" | axis : {0, 1}, default 0\n",
" | * 0: fill column-by-column\n",
" | * 1: fill row-by-row\n",
" | limit : int, default None.\n",
" | Maximum number of consecutive NaNs to fill. Must be greater than 0.\n",
" | limit_direction : {'forward', 'backward', 'both'}, default 'forward'\n",
" | If limit is specified, consecutive NaNs will be filled in this\n",
" | direction.\n",
" | \n",
" | .. versionadded:: 0.17.0\n",
" | \n",
" | inplace : bool, default False\n",
" | Update the NDFrame in place if possible.\n",
" | downcast : optional, 'infer' or None, defaults to None\n",
" | Downcast dtypes if possible.\n",
" | kwargs : keyword arguments to pass on to the interpolating function.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | Series or DataFrame of same shape interpolated at the NaNs\n",
" | \n",
" | See Also\n",
" | --------\n",
" | reindex, replace, fillna\n",
" | \n",
" | Examples\n",
" | --------\n",
" | \n",
" | Filling in NaNs\n",
" | \n",
" | >>> s = pd.Series([0, 1, np.nan, 3])\n",
" | >>> s.interpolate()\n",
" | 0 0\n",
" | 1 1\n",
" | 2 2\n",
" | 3 3\n",
" | dtype: float64\n",
" | \n",
" | keys(self)\n",
" | Get the 'info axis' (see Indexing for more)\n",
" | \n",
" | This is index for Series, columns for DataFrame and major_axis for\n",
" | Panel.\n",
" | \n",
" | last(self, offset)\n",
" | Convenience method for subsetting final periods of time series data\n",
" | based on a date offset.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | offset : string, DateOffset, dateutil.relativedelta\n",
" | \n",
" | Examples\n",
" | --------\n",
" | ts.last('5M') -> Last 5 months\n",
" | \n",
" | Returns\n",
" | -------\n",
" | subset : type of caller\n",
" | \n",
" | mask(self, cond, other=nan, inplace=False, axis=None, level=None, errors='raise', try_cast=False, raise_on_error=None)\n",
" | Return an object of same shape as self and whose corresponding\n",
" | entries are from self where `cond` is False and otherwise are from\n",
" | `other`.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | cond : boolean NDFrame, array-like, or callable\n",
" | Where `cond` is False, keep the original value. Where\n",
" | True, replace with corresponding value from `other`.\n",
" | If `cond` is callable, it is computed on the NDFrame and\n",
" | should return boolean NDFrame or array. The callable must\n",
" | not change input NDFrame (though pandas doesn't check it).\n",
" | \n",
" | .. versionadded:: 0.18.1\n",
" | A callable can be used as cond.\n",
" | \n",
" | other : scalar, NDFrame, or callable\n",
" | Entries where `cond` is True are replaced with\n",
" | corresponding value from `other`.\n",
" | If other is callable, it is computed on the NDFrame and\n",
" | should return scalar or NDFrame. The callable must not\n",
" | change input NDFrame (though pandas doesn't check it).\n",
" | \n",
" | .. versionadded:: 0.18.1\n",
" | A callable can be used as other.\n",
" | \n",
" | inplace : boolean, default False\n",
" | Whether to perform the operation in place on the data\n",
" | axis : alignment axis if needed, default None\n",
" | level : alignment level if needed, default None\n",
" | errors : str, {'raise', 'ignore'}, default 'raise'\n",
" | - ``raise`` : allow exceptions to be raised\n",
" | - ``ignore`` : suppress exceptions. On error return original object\n",
" | \n",
" | Note that currently this parameter won't affect\n",
" | the results and will always coerce to a suitable dtype.\n",
" | \n",
" | try_cast : boolean, default False\n",
" | try to cast the result back to the input type (if possible),\n",
" | raise_on_error : boolean, default True\n",
" | Whether to raise on invalid data types (e.g. trying to where on\n",
" | strings)\n",
" | \n",
" | .. deprecated:: 0.21.0\n",
" | \n",
" | Returns\n",
" | -------\n",
" | wh : same type as caller\n",
" | \n",
" | Notes\n",
" | -----\n",
" | The mask method is an application of the if-then idiom. For each\n",
" | element in the calling DataFrame, if ``cond`` is ``False`` the\n",
" | element is used; otherwise the corresponding element from the DataFrame\n",
" | ``other`` is used.\n",
" | \n",
" | The signature for :func:`DataFrame.where` differs from\n",
" | :func:`numpy.where`. Roughly ``df1.where(m, df2)`` is equivalent to\n",
" | ``np.where(m, df1, df2)``.\n",
" | \n",
" | For further details and examples see the ``mask`` documentation in\n",
" | :ref:`indexing `.\n",
" | \n",
" | Examples\n",
" | --------\n",
" | >>> s = pd.Series(range(5))\n",
" | >>> s.where(s > 0)\n",
" | 0 NaN\n",
" | 1 1.0\n",
" | 2 2.0\n",
" | 3 3.0\n",
" | 4 4.0\n",
" | \n",
" | >>> s.mask(s > 0)\n",
" | 0 0.0\n",
" | 1 NaN\n",
" | 2 NaN\n",
" | 3 NaN\n",
" | 4 NaN\n",
" | \n",
" | >>> s.where(s > 1, 10)\n",
" | 0 10.0\n",
" | 1 10.0\n",
" | 2 2.0\n",
" | 3 3.0\n",
" | 4 4.0\n",
" | \n",
" | >>> df = pd.DataFrame(np.arange(10).reshape(-1, 2), columns=['A', 'B'])\n",
" | >>> m = df % 3 == 0\n",
" | >>> df.where(m, -df)\n",
" | A B\n",
" | 0 0 -1\n",
" | 1 -2 3\n",
" | 2 -4 -5\n",
" | 3 6 -7\n",
" | 4 -8 9\n",
" | >>> df.where(m, -df) == np.where(m, df, -df)\n",
" | A B\n",
" | 0 True True\n",
" | 1 True True\n",
" | 2 True True\n",
" | 3 True True\n",
" | 4 True True\n",
" | >>> df.where(m, -df) == df.mask(~m, -df)\n",
" | A B\n",
" | 0 True True\n",
" | 1 True True\n",
" | 2 True True\n",
" | 3 True True\n",
" | 4 True True\n",
" | \n",
" | See Also\n",
" | --------\n",
" | :func:`DataFrame.where`\n",
" | \n",
" | pct_change(self, periods=1, fill_method='pad', limit=None, freq=None, **kwargs)\n",
" | Percent change over given number of periods.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | periods : int, default 1\n",
" | Periods to shift for forming percent change\n",
" | fill_method : str, default 'pad'\n",
" | How to handle NAs before computing percent changes\n",
" | limit : int, default None\n",
" | The number of consecutive NAs to fill before stopping\n",
" | freq : DateOffset, timedelta, or offset alias string, optional\n",
" | Increment to use from time series API (e.g. 'M' or BDay())\n",
" | \n",
" | Returns\n",
" | -------\n",
" | chg : NDFrame\n",
" | \n",
" | Notes\n",
" | -----\n",
" | \n",
" | By default, the percentage change is calculated along the stat\n",
" | axis: 0, or ``Index``, for ``DataFrame`` and 1, or ``minor`` for\n",
" | ``Panel``. You can change this with the ``axis`` keyword argument.\n",
" | \n",
" | pipe(self, func, *args, **kwargs)\n",
" | Apply func(self, \\*args, \\*\\*kwargs)\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | func : function\n",
" | function to apply to the NDFrame.\n",
" | ``args``, and ``kwargs`` are passed into ``func``.\n",
" | Alternatively a ``(callable, data_keyword)`` tuple where\n",
" | ``data_keyword`` is a string indicating the keyword of\n",
" | ``callable`` that expects the NDFrame.\n",
" | args : iterable, optional\n",
" | positional arguments passed into ``func``.\n",
" | kwargs : mapping, optional\n",
" | a dictionary of keyword arguments passed into ``func``.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | object : the return type of ``func``.\n",
" | \n",
" | Notes\n",
" | -----\n",
" | \n",
" | Use ``.pipe`` when chaining together functions that expect\n",
" | Series, DataFrames or GroupBy objects. Instead of writing\n",
" | \n",
" | >>> f(g(h(df), arg1=a), arg2=b, arg3=c)\n",
" | \n",
" | You can write\n",
" | \n",
" | >>> (df.pipe(h)\n",
" | ... .pipe(g, arg1=a)\n",
" | ... .pipe(f, arg2=b, arg3=c)\n",
" | ... )\n",
" | \n",
" | If you have a function that takes the data as (say) the second\n",
" | argument, pass a tuple indicating which keyword expects the\n",
" | data. For example, suppose ``f`` takes its data as ``arg2``:\n",
" | \n",
" | >>> (df.pipe(h)\n",
" | ... .pipe(g, arg1=a)\n",
" | ... .pipe((f, 'arg2'), arg1=a, arg3=c)\n",
" | ... )\n",
" | \n",
" | See Also\n",
" | --------\n",
" | pandas.DataFrame.apply\n",
" | pandas.DataFrame.applymap\n",
" | pandas.Series.map\n",
" | \n",
" | pop(self, item)\n",
" | Return item and drop from frame. Raise KeyError if not found.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | item : str\n",
" | Column label to be popped\n",
" | \n",
" | Returns\n",
" | -------\n",
" | popped : Series\n",
" | \n",
" | Examples\n",
" | --------\n",
" | >>> df = pd.DataFrame([('falcon', 'bird', 389.0),\n",
" | ... ('parrot', 'bird', 24.0),\n",
" | ... ('lion', 'mammal', 80.5),\n",
" | ... ('monkey', 'mammal', np.nan)],\n",
" | ... columns=('name', 'class', 'max_speed'))\n",
" | >>> df\n",
" | name class max_speed\n",
" | 0 falcon bird 389.0\n",
" | 1 parrot bird 24.0\n",
" | 2 lion mammal 80.5\n",
" | 3 monkey mammal NaN\n",
" | \n",
" | >>> df.pop('class')\n",
" | 0 bird\n",
" | 1 bird\n",
" | 2 mammal\n",
" | 3 mammal\n",
" | Name: class, dtype: object\n",
" | \n",
" | >>> df\n",
" | name max_speed\n",
" | 0 falcon 389.0\n",
" | 1 parrot 24.0\n",
" | 2 lion 80.5\n",
" | 3 monkey NaN\n",
" | \n",
" | rank(self, axis=0, method='average', numeric_only=None, na_option='keep', ascending=True, pct=False)\n",
" | Compute numerical data ranks (1 through n) along axis. Equal values are\n",
" | assigned a rank that is the average of the ranks of those values\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | axis : {0 or 'index', 1 or 'columns'}, default 0\n",
" | index to direct ranking\n",
" | method : {'average', 'min', 'max', 'first', 'dense'}\n",
" | * average: average rank of group\n",
" | * min: lowest rank in group\n",
" | * max: highest rank in group\n",
" | * first: ranks assigned in order they appear in the array\n",
" | * dense: like 'min', but rank always increases by 1 between groups\n",
" | numeric_only : boolean, default None\n",
" | Include only float, int, boolean data. Valid only for DataFrame or\n",
" | Panel objects\n",
" | na_option : {'keep', 'top', 'bottom'}\n",
" | * keep: leave NA values where they are\n",
" | * top: smallest rank if ascending\n",
" | * bottom: smallest rank if descending\n",
" | ascending : boolean, default True\n",
" | False for ranks by high (1) to low (N)\n",
" | pct : boolean, default False\n",
" | Computes percentage rank of data\n",
" | \n",
" | Returns\n",
" | -------\n",
" | ranks : same type as caller\n",
" | \n",
" | reindex_like(self, other, method=None, copy=True, limit=None, tolerance=None)\n",
" | Return an object with matching indices to myself.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | other : Object\n",
" | method : string or None\n",
" | copy : boolean, default True\n",
" | limit : int, default None\n",
" | Maximum number of consecutive labels to fill for inexact matches.\n",
" | tolerance : optional\n",
" | Maximum distance between labels of the other object and this\n",
" | object for inexact matches. Can be list-like.\n",
" | \n",
" | .. versionadded:: 0.17.0\n",
" | .. versionadded:: 0.21.0 (list-like tolerance)\n",
" | \n",
" | Notes\n",
" | -----\n",
" | Like calling s.reindex(index=other.index, columns=other.columns,\n",
" | method=...)\n",
" | \n",
" | Returns\n",
" | -------\n",
" | reindexed : same as input\n",
" | \n",
" | rename_axis(self, mapper, axis=0, copy=True, inplace=False)\n",
" | Alter the name of the index or columns.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | mapper : scalar, list-like, optional\n",
" | Value to set the axis name attribute.\n",
" | axis : int or string, default 0\n",
" | copy : boolean, default True\n",
" | Also copy underlying data\n",
" | inplace : boolean, default False\n",
" | \n",
" | Returns\n",
" | -------\n",
" | renamed : type of caller or None if inplace=True\n",
" | \n",
" | Notes\n",
" | -----\n",
" | Prior to version 0.21.0, ``rename_axis`` could also be used to change\n",
" | the axis *labels* by passing a mapping or scalar. This behavior is\n",
" | deprecated and will be removed in a future version. Use ``rename``\n",
" | instead.\n",
" | \n",
" | See Also\n",
" | --------\n",
" | pandas.Series.rename, pandas.DataFrame.rename\n",
" | pandas.Index.rename\n",
" | \n",
" | Examples\n",
" | --------\n",
" | \n",
" | >>> df = pd.DataFrame({\"A\": [1, 2, 3], \"B\": [4, 5, 6]})\n",
" | >>> df.rename_axis(\"foo\")\n",
" | A B\n",
" | foo\n",
" | 0 1 4\n",
" | 1 2 5\n",
" | 2 3 6\n",
" | \n",
" | >>> df.rename_axis(\"bar\", axis=\"columns\")\n",
" | bar A B\n",
" | 0 1 4\n",
" | 1 2 5\n",
" | 2 3 6\n",
" | \n",
" | replace(self, to_replace=None, value=None, inplace=False, limit=None, regex=False, method='pad', axis=None)\n",
" | Replace values given in 'to_replace' with 'value'.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | to_replace : str, regex, list, dict, Series, numeric, or None\n",
" | \n",
" | * str or regex:\n",
" | \n",
" | - str: string exactly matching `to_replace` will be replaced\n",
" | with `value`\n",
" | - regex: regexs matching `to_replace` will be replaced with\n",
" | `value`\n",
" | \n",
" | * list of str, regex, or numeric:\n",
" | \n",
" | - First, if `to_replace` and `value` are both lists, they\n",
" | **must** be the same length.\n",
" | - Second, if ``regex=True`` then all of the strings in **both**\n",
" | lists will be interpreted as regexs otherwise they will match\n",
" | directly. This doesn't matter much for `value` since there\n",
" | are only a few possible substitution regexes you can use.\n",
" | - str and regex rules apply as above.\n",
" | \n",
" | * dict:\n",
" | \n",
" | - Nested dictionaries, e.g., {'a': {'b': nan}}, are read as\n",
" | follows: look in column 'a' for the value 'b' and replace it\n",
" | with nan. You can nest regular expressions as well. Note that\n",
" | column names (the top-level dictionary keys in a nested\n",
" | dictionary) **cannot** be regular expressions.\n",
" | - Keys map to column names and values map to substitution\n",
" | values. You can treat this as a special case of passing two\n",
" | lists except that you are specifying the column to search in.\n",
" | \n",
" | * None:\n",
" | \n",
" | - This means that the ``regex`` argument must be a string,\n",
" | compiled regular expression, or list, dict, ndarray or Series\n",
" | of such elements. If `value` is also ``None`` then this\n",
" | **must** be a nested dictionary or ``Series``.\n",
" | \n",
" | See the examples section for examples of each of these.\n",
" | value : scalar, dict, list, str, regex, default None\n",
" | Value to use to fill holes (e.g. 0), alternately a dict of values\n",
" | specifying which value to use for each column (columns not in the\n",
" | dict will not be filled). Regular expressions, strings and lists or\n",
" | dicts of such objects are also allowed.\n",
" | inplace : boolean, default False\n",
" | If True, in place. Note: this will modify any\n",
" | other views on this object (e.g. a column from a DataFrame).\n",
" | Returns the caller if this is True.\n",
" | limit : int, default None\n",
" | Maximum size gap to forward or backward fill\n",
" | regex : bool or same types as `to_replace`, default False\n",
" | Whether to interpret `to_replace` and/or `value` as regular\n",
" | expressions. If this is ``True`` then `to_replace` *must* be a\n",
" | string. Otherwise, `to_replace` must be ``None`` because this\n",
" | parameter will be interpreted as a regular expression or a list,\n",
" | dict, or array of regular expressions.\n",
" | method : string, optional, {'pad', 'ffill', 'bfill'}\n",
" | The method to use when for replacement, when ``to_replace`` is a\n",
" | ``list``.\n",
" | \n",
" | See Also\n",
" | --------\n",
" | NDFrame.reindex\n",
" | NDFrame.asfreq\n",
" | NDFrame.fillna\n",
" | \n",
" | Returns\n",
" | -------\n",
" | filled : NDFrame\n",
" | \n",
" | Raises\n",
" | ------\n",
" | AssertionError\n",
" | * If `regex` is not a ``bool`` and `to_replace` is not ``None``.\n",
" | TypeError\n",
" | * If `to_replace` is a ``dict`` and `value` is not a ``list``,\n",
" | ``dict``, ``ndarray``, or ``Series``\n",
" | * If `to_replace` is ``None`` and `regex` is not compilable into a\n",
" | regular expression or is a list, dict, ndarray, or Series.\n",
" | ValueError\n",
" | * If `to_replace` and `value` are ``list`` s or ``ndarray`` s, but\n",
" | they are not the same length.\n",
" | \n",
" | Notes\n",
" | -----\n",
" | * Regex substitution is performed under the hood with ``re.sub``. The\n",
" | rules for substitution for ``re.sub`` are the same.\n",
" | * Regular expressions will only substitute on strings, meaning you\n",
" | cannot provide, for example, a regular expression matching floating\n",
" | point numbers and expect the columns in your frame that have a\n",
" | numeric dtype to be matched. However, if those floating point numbers\n",
" | *are* strings, then you can do this.\n",
" | * This method has *a lot* of options. You are encouraged to experiment\n",
" | and play with this method to gain intuition about how it works.\n",
" | \n",
" | resample(self, rule, how=None, axis=0, fill_method=None, closed=None, label=None, convention='start', kind=None, loffset=None, limit=None, base=0, on=None, level=None)\n",
" | Convenience method for frequency conversion and resampling of time\n",
" | series. Object must have a datetime-like index (DatetimeIndex,\n",
" | PeriodIndex, or TimedeltaIndex), or pass datetime-like values\n",
" | to the on or level keyword.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | rule : string\n",
" | the offset string or object representing target conversion\n",
" | axis : int, optional, default 0\n",
" | closed : {'right', 'left'}\n",
" | Which side of bin interval is closed. The default is 'left'\n",
" | for all frequency offsets except for 'M', 'A', 'Q', 'BM',\n",
" | 'BA', 'BQ', and 'W' which all have a default of 'right'.\n",
" | label : {'right', 'left'}\n",
" | Which bin edge label to label bucket with. The default is 'left'\n",
" | for all frequency offsets except for 'M', 'A', 'Q', 'BM',\n",
" | 'BA', 'BQ', and 'W' which all have a default of 'right'.\n",
" | convention : {'start', 'end', 's', 'e'}\n",
" | For PeriodIndex only, controls whether to use the start or end of\n",
" | `rule`\n",
" | loffset : timedelta\n",
" | Adjust the resampled time labels\n",
" | base : int, default 0\n",
" | For frequencies that evenly subdivide 1 day, the \"origin\" of the\n",
" | aggregated intervals. For example, for '5min' frequency, base could\n",
" | range from 0 through 4. Defaults to 0\n",
" | on : string, optional\n",
" | For a DataFrame, column to use instead of index for resampling.\n",
" | Column must be datetime-like.\n",
" | \n",
" | .. versionadded:: 0.19.0\n",
" | \n",
" | level : string or int, optional\n",
" | For a MultiIndex, level (name or number) to use for\n",
" | resampling. Level must be datetime-like.\n",
" | \n",
" | .. versionadded:: 0.19.0\n",
" | \n",
" | Notes\n",
" | -----\n",
" | To learn more about the offset strings, please see `this link\n",
" | `__.\n",
" | \n",
" | Examples\n",
" | --------\n",
" | \n",
" | Start by creating a series with 9 one minute timestamps.\n",
" | \n",
" | >>> index = pd.date_range('1/1/2000', periods=9, freq='T')\n",
" | >>> series = pd.Series(range(9), index=index)\n",
" | >>> series\n",
" | 2000-01-01 00:00:00 0\n",
" | 2000-01-01 00:01:00 1\n",
" | 2000-01-01 00:02:00 2\n",
" | 2000-01-01 00:03:00 3\n",
" | 2000-01-01 00:04:00 4\n",
" | 2000-01-01 00:05:00 5\n",
" | 2000-01-01 00:06:00 6\n",
" | 2000-01-01 00:07:00 7\n",
" | 2000-01-01 00:08:00 8\n",
" | Freq: T, dtype: int64\n",
" | \n",
" | Downsample the series into 3 minute bins and sum the values\n",
" | of the timestamps falling into a bin.\n",
" | \n",
" | >>> series.resample('3T').sum()\n",
" | 2000-01-01 00:00:00 3\n",
" | 2000-01-01 00:03:00 12\n",
" | 2000-01-01 00:06:00 21\n",
" | Freq: 3T, dtype: int64\n",
" | \n",
" | Downsample the series into 3 minute bins as above, but label each\n",
" | bin using the right edge instead of the left. Please note that the\n",
" | value in the bucket used as the label is not included in the bucket,\n",
" | which it labels. For example, in the original series the\n",
" | bucket ``2000-01-01 00:03:00`` contains the value 3, but the summed\n",
" | value in the resampled bucket with the label ``2000-01-01 00:03:00``\n",
" | does not include 3 (if it did, the summed value would be 6, not 3).\n",
" | To include this value close the right side of the bin interval as\n",
" | illustrated in the example below this one.\n",
" | \n",
" | >>> series.resample('3T', label='right').sum()\n",
" | 2000-01-01 00:03:00 3\n",
" | 2000-01-01 00:06:00 12\n",
" | 2000-01-01 00:09:00 21\n",
" | Freq: 3T, dtype: int64\n",
" | \n",
" | Downsample the series into 3 minute bins as above, but close the right\n",
" | side of the bin interval.\n",
" | \n",
" | >>> series.resample('3T', label='right', closed='right').sum()\n",
" | 2000-01-01 00:00:00 0\n",
" | 2000-01-01 00:03:00 6\n",
" | 2000-01-01 00:06:00 15\n",
" | 2000-01-01 00:09:00 15\n",
" | Freq: 3T, dtype: int64\n",
" | \n",
" | Upsample the series into 30 second bins.\n",
" | \n",
" | >>> series.resample('30S').asfreq()[0:5] #select first 5 rows\n",
" | 2000-01-01 00:00:00 0.0\n",
" | 2000-01-01 00:00:30 NaN\n",
" | 2000-01-01 00:01:00 1.0\n",
" | 2000-01-01 00:01:30 NaN\n",
" | 2000-01-01 00:02:00 2.0\n",
" | Freq: 30S, dtype: float64\n",
" | \n",
" | Upsample the series into 30 second bins and fill the ``NaN``\n",
" | values using the ``pad`` method.\n",
" | \n",
" | >>> series.resample('30S').pad()[0:5]\n",
" | 2000-01-01 00:00:00 0\n",
" | 2000-01-01 00:00:30 0\n",
" | 2000-01-01 00:01:00 1\n",
" | 2000-01-01 00:01:30 1\n",
" | 2000-01-01 00:02:00 2\n",
" | Freq: 30S, dtype: int64\n",
" | \n",
" | Upsample the series into 30 second bins and fill the\n",
" | ``NaN`` values using the ``bfill`` method.\n",
" | \n",
" | >>> series.resample('30S').bfill()[0:5]\n",
" | 2000-01-01 00:00:00 0\n",
" | 2000-01-01 00:00:30 1\n",
" | 2000-01-01 00:01:00 1\n",
" | 2000-01-01 00:01:30 2\n",
" | 2000-01-01 00:02:00 2\n",
" | Freq: 30S, dtype: int64\n",
" | \n",
" | Pass a custom function via ``apply``\n",
" | \n",
" | >>> def custom_resampler(array_like):\n",
" | ... return np.sum(array_like)+5\n",
" | \n",
" | >>> series.resample('3T').apply(custom_resampler)\n",
" | 2000-01-01 00:00:00 8\n",
" | 2000-01-01 00:03:00 17\n",
" | 2000-01-01 00:06:00 26\n",
" | Freq: 3T, dtype: int64\n",
" | \n",
" | For a Series with a PeriodIndex, the keyword `convention` can be\n",
" | used to control whether to use the start or end of `rule`.\n",
" | \n",
" | >>> s = pd.Series([1, 2], index=pd.period_range('2012-01-01',\n",
" | freq='A',\n",
" | periods=2))\n",
" | >>> s\n",
" | 2012 1\n",
" | 2013 2\n",
" | Freq: A-DEC, dtype: int64\n",
" | \n",
" | Resample by month using 'start' `convention`. Values are assigned to\n",
" | the first month of the period.\n",
" | \n",
" | >>> s.resample('M', convention='start').asfreq().head()\n",
" | 2012-01 1.0\n",
" | 2012-02 NaN\n",
" | 2012-03 NaN\n",
" | 2012-04 NaN\n",
" | 2012-05 NaN\n",
" | Freq: M, dtype: float64\n",
" | \n",
" | Resample by month using 'end' `convention`. Values are assigned to\n",
" | the last month of the period.\n",
" | \n",
" | >>> s.resample('M', convention='end').asfreq()\n",
" | 2012-12 1.0\n",
" | 2013-01 NaN\n",
" | 2013-02 NaN\n",
" | 2013-03 NaN\n",
" | 2013-04 NaN\n",
" | 2013-05 NaN\n",
" | 2013-06 NaN\n",
" | 2013-07 NaN\n",
" | 2013-08 NaN\n",
" | 2013-09 NaN\n",
" | 2013-10 NaN\n",
" | 2013-11 NaN\n",
" | 2013-12 2.0\n",
" | Freq: M, dtype: float64\n",
" | \n",
" | For DataFrame objects, the keyword ``on`` can be used to specify the\n",
" | column instead of the index for resampling.\n",
" | \n",
" | >>> df = pd.DataFrame(data=9*[range(4)], columns=['a', 'b', 'c', 'd'])\n",
" | >>> df['time'] = pd.date_range('1/1/2000', periods=9, freq='T')\n",
" | >>> df.resample('3T', on='time').sum()\n",
" | a b c d\n",
" | time\n",
" | 2000-01-01 00:00:00 0 3 6 9\n",
" | 2000-01-01 00:03:00 0 3 6 9\n",
" | 2000-01-01 00:06:00 0 3 6 9\n",
" | \n",
" | For a DataFrame with MultiIndex, the keyword ``level`` can be used to\n",
" | specify on level the resampling needs to take place.\n",
" | \n",
" | >>> time = pd.date_range('1/1/2000', periods=5, freq='T')\n",
" | >>> df2 = pd.DataFrame(data=10*[range(4)],\n",
" | columns=['a', 'b', 'c', 'd'],\n",
" | index=pd.MultiIndex.from_product([time, [1, 2]])\n",
" | )\n",
" | >>> df2.resample('3T', level=0).sum()\n",
" | a b c d\n",
" | 2000-01-01 00:00:00 0 6 12 18\n",
" | 2000-01-01 00:03:00 0 4 8 12\n",
" | \n",
" | sample(self, n=None, frac=None, replace=False, weights=None, random_state=None, axis=None)\n",
" | Returns a random sample of items from an axis of object.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | n : int, optional\n",
" | Number of items from axis to return. Cannot be used with `frac`.\n",
" | Default = 1 if `frac` = None.\n",
" | frac : float, optional\n",
" | Fraction of axis items to return. Cannot be used with `n`.\n",
" | replace : boolean, optional\n",
" | Sample with or without replacement. Default = False.\n",
" | weights : str or ndarray-like, optional\n",
" | Default 'None' results in equal probability weighting.\n",
" | If passed a Series, will align with target object on index. Index\n",
" | values in weights not found in sampled object will be ignored and\n",
" | index values in sampled object not in weights will be assigned\n",
" | weights of zero.\n",
" | If called on a DataFrame, will accept the name of a column\n",
" | when axis = 0.\n",
" | Unless weights are a Series, weights must be same length as axis\n",
" | being sampled.\n",
" | If weights do not sum to 1, they will be normalized to sum to 1.\n",
" | Missing values in the weights column will be treated as zero.\n",
" | inf and -inf values not allowed.\n",
" | random_state : int or numpy.random.RandomState, optional\n",
" | Seed for the random number generator (if int), or numpy RandomState\n",
" | object.\n",
" | axis : int or string, optional\n",
" | Axis to sample. Accepts axis number or name. Default is stat axis\n",
" | for given data type (0 for Series and DataFrames, 1 for Panels).\n",
" | \n",
" | Returns\n",
" | -------\n",
" | A new object of same type as caller.\n",
" | \n",
" | Examples\n",
" | --------\n",
" | \n",
" | Generate an example ``Series`` and ``DataFrame``:\n",
" | \n",
" | >>> s = pd.Series(np.random.randn(50))\n",
" | >>> s.head()\n",
" | 0 -0.038497\n",
" | 1 1.820773\n",
" | 2 -0.972766\n",
" | 3 -1.598270\n",
" | 4 -1.095526\n",
" | dtype: float64\n",
" | >>> df = pd.DataFrame(np.random.randn(50, 4), columns=list('ABCD'))\n",
" | >>> df.head()\n",
" | A B C D\n",
" | 0 0.016443 -2.318952 -0.566372 -1.028078\n",
" | 1 -1.051921 0.438836 0.658280 -0.175797\n",
" | 2 -1.243569 -0.364626 -0.215065 0.057736\n",
" | 3 1.768216 0.404512 -0.385604 -1.457834\n",
" | 4 1.072446 -1.137172 0.314194 -0.046661\n",
" | \n",
" | Next extract a random sample from both of these objects...\n",
" | \n",
" | 3 random elements from the ``Series``:\n",
" | \n",
" | >>> s.sample(n=3)\n",
" | 27 -0.994689\n",
" | 55 -1.049016\n",
" | 67 -0.224565\n",
" | dtype: float64\n",
" | \n",
" | And a random 10% of the ``DataFrame`` with replacement:\n",
" | \n",
" | >>> df.sample(frac=0.1, replace=True)\n",
" | A B C D\n",
" | 35 1.981780 0.142106 1.817165 -0.290805\n",
" | 49 -1.336199 -0.448634 -0.789640 0.217116\n",
" | 40 0.823173 -0.078816 1.009536 1.015108\n",
" | 15 1.421154 -0.055301 -1.922594 -0.019696\n",
" | 6 -0.148339 0.832938 1.787600 -1.383767\n",
" | \n",
" | select(self, crit, axis=0)\n",
" | Return data corresponding to axis labels matching criteria\n",
" | \n",
" | DEPRECATED: use df.loc[df.index.map(crit)] to select via labels\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | crit : function\n",
" | To be called on each index (label). Should return True or False\n",
" | axis : int\n",
" | \n",
" | Returns\n",
" | -------\n",
" | selection : type of caller\n",
" | \n",
" | set_axis(self, labels, axis=0, inplace=None)\n",
" | Assign desired index to given axis\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | labels: list-like or Index\n",
" | The values for the new index\n",
" | axis : int or string, default 0\n",
" | inplace : boolean, default None\n",
" | Whether to return a new NDFrame instance.\n",
" | \n",
" | WARNING: inplace=None currently falls back to to True, but\n",
" | in a future version, will default to False. Use inplace=True\n",
" | explicitly rather than relying on the default.\n",
" | \n",
" | .. versionadded:: 0.21.0\n",
" | The signature is make consistent to the rest of the API.\n",
" | Previously, the \"axis\" and \"labels\" arguments were respectively\n",
" | the first and second positional arguments.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | renamed : NDFrame or None\n",
" | An object of same type as caller if inplace=False, None otherwise.\n",
" | \n",
" | See Also\n",
" | --------\n",
" | pandas.NDFrame.rename\n",
" | \n",
" | Examples\n",
" | --------\n",
" | >>> s = pd.Series([1, 2, 3])\n",
" | >>> s\n",
" | 0 1\n",
" | 1 2\n",
" | 2 3\n",
" | dtype: int64\n",
" | >>> s.set_axis(['a', 'b', 'c'], axis=0, inplace=False)\n",
" | a 1\n",
" | b 2\n",
" | c 3\n",
" | dtype: int64\n",
" | >>> df = pd.DataFrame({\"A\": [1, 2, 3], \"B\": [4, 5, 6]})\n",
" | >>> df.set_axis(['a', 'b', 'c'], axis=0, inplace=False)\n",
" | A B\n",
" | a 1 4\n",
" | b 2 5\n",
" | c 3 6\n",
" | >>> df.set_axis(['I', 'II'], axis=1, inplace=False)\n",
" | I II\n",
" | 0 1 4\n",
" | 1 2 5\n",
" | 2 3 6\n",
" | >>> df.set_axis(['i', 'ii'], axis=1, inplace=True)\n",
" | >>> df\n",
" | i ii\n",
" | 0 1 4\n",
" | 1 2 5\n",
" | 2 3 6\n",
" | \n",
" | slice_shift(self, periods=1, axis=0)\n",
" | Equivalent to `shift` without copying data. The shifted data will\n",
" | not include the dropped periods and the shifted axis will be smaller\n",
" | than the original.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | periods : int\n",
" | Number of periods to move, can be positive or negative\n",
" | \n",
" | Notes\n",
" | -----\n",
" | While the `slice_shift` is faster than `shift`, you may pay for it\n",
" | later during alignment.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | shifted : same type as caller\n",
" | \n",
" | squeeze(self, axis=None)\n",
" | Squeeze length 1 dimensions.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | axis : None, integer or string axis name, optional\n",
" | The axis to squeeze if 1-sized.\n",
" | \n",
" | .. versionadded:: 0.20.0\n",
" | \n",
" | Returns\n",
" | -------\n",
" | scalar if 1-sized, else original object\n",
" | \n",
" | swapaxes(self, axis1, axis2, copy=True)\n",
" | Interchange axes and swap values axes appropriately\n",
" | \n",
" | Returns\n",
" | -------\n",
" | y : same as input\n",
" | \n",
" | tail(self, n=5)\n",
" | Return the last n rows.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | n : int, default 5\n",
" | Number of rows to select.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | obj_tail : type of caller\n",
" | The last n rows of the caller object.\n",
" | \n",
" | take(self, indices, axis=0, convert=None, is_copy=True, **kwargs)\n",
" | Return the elements in the given *positional* indices along an axis.\n",
" | \n",
" | This means that we are not indexing according to actual values in\n",
" | the index attribute of the object. We are indexing according to the\n",
" | actual position of the element in the object.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | indices : array-like\n",
" | An array of ints indicating which positions to take.\n",
" | axis : int, default 0\n",
" | The axis on which to select elements. \"0\" means that we are\n",
" | selecting rows, \"1\" means that we are selecting columns, etc.\n",
" | convert : bool, default True\n",
" | .. deprecated:: 0.21.0\n",
" | In the future, negative indices will always be converted.\n",
" | \n",
" | Whether to convert negative indices into positive ones.\n",
" | For example, ``-1`` would map to the ``len(axis) - 1``.\n",
" | The conversions are similar to the behavior of indexing a\n",
" | regular Python list.\n",
" | is_copy : bool, default True\n",
" | Whether to return a copy of the original object or not.\n",
" | \n",
" | Examples\n",
" | --------\n",
" | >>> df = pd.DataFrame([('falcon', 'bird', 389.0),\n",
" | ('parrot', 'bird', 24.0),\n",
" | ('lion', 'mammal', 80.5),\n",
" | ('monkey', 'mammal', np.nan)],\n",
" | columns=('name', 'class', 'max_speed'),\n",
" | index=[0, 2, 3, 1])\n",
" | >>> df\n",
" | name class max_speed\n",
" | 0 falcon bird 389.0\n",
" | 2 parrot bird 24.0\n",
" | 3 lion mammal 80.5\n",
" | 1 monkey mammal NaN\n",
" | \n",
" | Take elements at positions 0 and 3 along the axis 0 (default).\n",
" | \n",
" | Note how the actual indices selected (0 and 1) do not correspond to\n",
" | our selected indices 0 and 3. That's because we are selecting the 0th\n",
" | and 3rd rows, not rows whose indices equal 0 and 3.\n",
" | \n",
" | >>> df.take([0, 3])\n",
" | 0 falcon bird 389.0\n",
" | 1 monkey mammal NaN\n",
" | \n",
" | Take elements at indices 1 and 2 along the axis 1 (column selection).\n",
" | \n",
" | >>> df.take([1, 2], axis=1)\n",
" | class max_speed\n",
" | 0 bird 389.0\n",
" | 2 bird 24.0\n",
" | 3 mammal 80.5\n",
" | 1 mammal NaN\n",
" | \n",
" | We may take elements using negative integers for positive indices,\n",
" | starting from the end of the object, just like with Python lists.\n",
" | \n",
" | >>> df.take([-1, -2])\n",
" | name class max_speed\n",
" | 1 monkey mammal NaN\n",
" | 3 lion mammal 80.5\n",
" | \n",
" | Returns\n",
" | -------\n",
" | taken : type of caller\n",
" | An array-like containing the elements taken from the object.\n",
" | \n",
" | See Also\n",
" | --------\n",
" | numpy.ndarray.take\n",
" | numpy.take\n",
" | \n",
" | to_clipboard(self, excel=None, sep=None, **kwargs)\n",
" | Attempt to write text representation of object to the system clipboard\n",
" | This can be pasted into Excel, for example.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | excel : boolean, defaults to True\n",
" | if True, use the provided separator, writing in a csv\n",
" | format for allowing easy pasting into excel.\n",
" | if False, write a string representation of the object\n",
" | to the clipboard\n",
" | sep : optional, defaults to tab\n",
" | other keywords are passed to to_csv\n",
" | \n",
" | Notes\n",
" | -----\n",
" | Requirements for your platform\n",
" | - Linux: xclip, or xsel (with gtk or PyQt4 modules)\n",
" | - Windows: none\n",
" | - OS X: none\n",
" | \n",
" | to_dense(self)\n",
" | Return dense representation of NDFrame (as opposed to sparse)\n",
" | \n",
" | to_hdf(self, path_or_buf, key, **kwargs)\n",
" | Write the contained data to an HDF5 file using HDFStore.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | path_or_buf : the path (string) or HDFStore object\n",
" | key : string\n",
" | identifier for the group in the store\n",
" | mode : optional, {'a', 'w', 'r+'}, default 'a'\n",
" | \n",
" | ``'w'``\n",
" | Write; a new file is created (an existing file with the same\n",
" | name would be deleted).\n",
" | ``'a'``\n",
" | Append; an existing file is opened for reading and writing,\n",
" | and if the file does not exist it is created.\n",
" | ``'r+'``\n",
" | It is similar to ``'a'``, but the file must already exist.\n",
" | format : 'fixed(f)|table(t)', default is 'fixed'\n",
" | fixed(f) : Fixed format\n",
" | Fast writing/reading. Not-appendable, nor searchable\n",
" | table(t) : Table format\n",
" | Write as a PyTables Table structure which may perform\n",
" | worse but allow more flexible operations like searching\n",
" | / selecting subsets of the data\n",
" | append : boolean, default False\n",
" | For Table formats, append the input data to the existing\n",
" | data_columns : list of columns, or True, default None\n",
" | List of columns to create as indexed data columns for on-disk\n",
" | queries, or True to use all columns. By default only the axes\n",
" | of the object are indexed. See `here\n",
" | `__.\n",
" | \n",
" | Applicable only to format='table'.\n",
" | complevel : int, 0-9, default None\n",
" | Specifies a compression level for data.\n",
" | A value of 0 disables compression.\n",
" | complib : {'zlib', 'lzo', 'bzip2', 'blosc'}, default 'zlib'\n",
" | Specifies the compression library to be used.\n",
" | As of v0.20.2 these additional compressors for Blosc are supported\n",
" | (default if no compressor specified: 'blosc:blosclz'):\n",
" | {'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy',\n",
" | 'blosc:zlib', 'blosc:zstd'}.\n",
" | Specifying a compression library which is not available issues\n",
" | a ValueError.\n",
" | fletcher32 : bool, default False\n",
" | If applying compression use the fletcher32 checksum\n",
" | dropna : boolean, default False.\n",
" | If true, ALL nan rows will not be written to store.\n",
" | \n",
" | to_json(self, path_or_buf=None, orient=None, date_format=None, double_precision=10, force_ascii=True, date_unit='ms', default_handler=None, lines=False, compression=None)\n",
" | Convert the object to a JSON string.\n",
" | \n",
" | Note NaN's and None will be converted to null and datetime objects\n",
" | will be converted to UNIX timestamps.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | path_or_buf : the path or buffer to write the result string\n",
" | if this is None, return the converted string\n",
" | orient : string\n",
" | \n",
" | * Series\n",
" | \n",
" | - default is 'index'\n",
" | - allowed values are: {'split','records','index'}\n",
" | \n",
" | * DataFrame\n",
" | \n",
" | - default is 'columns'\n",
" | - allowed values are:\n",
" | {'split','records','index','columns','values'}\n",
" | \n",
" | * The format of the JSON string\n",
" | \n",
" | - split : dict like\n",
" | {index -> [index], columns -> [columns], data -> [values]}\n",
" | - records : list like\n",
" | [{column -> value}, ... , {column -> value}]\n",
" | - index : dict like {index -> {column -> value}}\n",
" | - columns : dict like {column -> {index -> value}}\n",
" | - values : just the values array\n",
" | - table : dict like {'schema': {schema}, 'data': {data}}\n",
" | describing the data, and the data component is\n",
" | like ``orient='records'``.\n",
" | \n",
" | .. versionchanged:: 0.20.0\n",
" | \n",
" | date_format : {None, 'epoch', 'iso'}\n",
" | Type of date conversion. `epoch` = epoch milliseconds,\n",
" | `iso` = ISO8601. The default depends on the `orient`. For\n",
" | `orient='table'`, the default is `'iso'`. For all other orients,\n",
" | the default is `'epoch'`.\n",
" | double_precision : The number of decimal places to use when encoding\n",
" | floating point values, default 10.\n",
" | force_ascii : force encoded string to be ASCII, default True.\n",
" | date_unit : string, default 'ms' (milliseconds)\n",
" | The time unit to encode to, governs timestamp and ISO8601\n",
" | precision. One of 's', 'ms', 'us', 'ns' for second, millisecond,\n",
" | microsecond, and nanosecond respectively.\n",
" | default_handler : callable, default None\n",
" | Handler to call if object cannot otherwise be converted to a\n",
" | suitable format for JSON. Should receive a single argument which is\n",
" | the object to convert and return a serialisable object.\n",
" | lines : boolean, default False\n",
" | If 'orient' is 'records' write out line delimited json format. Will\n",
" | throw ValueError if incorrect 'orient' since others are not list\n",
" | like.\n",
" | \n",
" | .. versionadded:: 0.19.0\n",
" | \n",
" | compression : {None, 'gzip', 'bz2', 'xz'}\n",
" | A string representing the compression to use in the output file,\n",
" | only used when the first argument is a filename\n",
" | \n",
" | .. versionadded:: 0.21.0\n",
" | \n",
" | Returns\n",
" | -------\n",
" | same type as input object with filtered info axis\n",
" | \n",
" | See Also\n",
" | --------\n",
" | pd.read_json\n",
" | \n",
" | Examples\n",
" | --------\n",
" | \n",
" | >>> df = pd.DataFrame([['a', 'b'], ['c', 'd']],\n",
" | ... index=['row 1', 'row 2'],\n",
" | ... columns=['col 1', 'col 2'])\n",
" | >>> df.to_json(orient='split')\n",
" | '{\"columns\":[\"col 1\",\"col 2\"],\n",
" | \"index\":[\"row 1\",\"row 2\"],\n",
" | \"data\":[[\"a\",\"b\"],[\"c\",\"d\"]]}'\n",
" | \n",
" | Encoding/decoding a Dataframe using ``'index'`` formatted JSON:\n",
" | \n",
" | >>> df.to_json(orient='index')\n",
" | '{\"row 1\":{\"col 1\":\"a\",\"col 2\":\"b\"},\"row 2\":{\"col 1\":\"c\",\"col 2\":\"d\"}}'\n",
" | \n",
" | Encoding/decoding a Dataframe using ``'records'`` formatted JSON.\n",
" | Note that index labels are not preserved with this encoding.\n",
" | \n",
" | >>> df.to_json(orient='records')\n",
" | '[{\"col 1\":\"a\",\"col 2\":\"b\"},{\"col 1\":\"c\",\"col 2\":\"d\"}]'\n",
" | \n",
" | Encoding with Table Schema\n",
" | \n",
" | >>> df.to_json(orient='table')\n",
" | '{\"schema\": {\"fields\": [{\"name\": \"index\", \"type\": \"string\"},\n",
" | {\"name\": \"col 1\", \"type\": \"string\"},\n",
" | {\"name\": \"col 2\", \"type\": \"string\"}],\n",
" | \"primaryKey\": \"index\",\n",
" | \"pandas_version\": \"0.20.0\"},\n",
" | \"data\": [{\"index\": \"row 1\", \"col 1\": \"a\", \"col 2\": \"b\"},\n",
" | {\"index\": \"row 2\", \"col 1\": \"c\", \"col 2\": \"d\"}]}'\n",
" | \n",
" | to_latex(self, buf=None, columns=None, col_space=None, header=True, index=True, na_rep='NaN', formatters=None, float_format=None, sparsify=None, index_names=True, bold_rows=False, column_format=None, longtable=None, escape=None, encoding=None, decimal='.', multicolumn=None, multicolumn_format=None, multirow=None)\n",
" | Render an object to a tabular environment table. You can splice\n",
" | this into a LaTeX document. Requires \\\\usepackage{booktabs}.\n",
" | \n",
" | .. versionchanged:: 0.20.2\n",
" | Added to Series\n",
" | \n",
" | `to_latex`-specific options:\n",
" | \n",
" | bold_rows : boolean, default False\n",
" | Make the row labels bold in the output\n",
" | column_format : str, default None\n",
" | The columns format as specified in `LaTeX table format\n",
" | `__ e.g 'rcl' for 3\n",
" | columns\n",
" | longtable : boolean, default will be read from the pandas config module\n",
" | Default: False.\n",
" | Use a longtable environment instead of tabular. Requires adding\n",
" | a \\\\usepackage{longtable} to your LaTeX preamble.\n",
" | escape : boolean, default will be read from the pandas config module\n",
" | Default: True.\n",
" | When set to False prevents from escaping latex special\n",
" | characters in column names.\n",
" | encoding : str, default None\n",
" | A string representing the encoding to use in the output file,\n",
" | defaults to 'ascii' on Python 2 and 'utf-8' on Python 3.\n",
" | decimal : string, default '.'\n",
" | Character recognized as decimal separator, e.g. ',' in Europe.\n",
" | \n",
" | .. versionadded:: 0.18.0\n",
" | \n",
" | multicolumn : boolean, default True\n",
" | Use \\multicolumn to enhance MultiIndex columns.\n",
" | The default will be read from the config module.\n",
" | \n",
" | .. versionadded:: 0.20.0\n",
" | \n",
" | multicolumn_format : str, default 'l'\n",
" | The alignment for multicolumns, similar to `column_format`\n",
" | The default will be read from the config module.\n",
" | \n",
" | .. versionadded:: 0.20.0\n",
" | \n",
" | multirow : boolean, default False\n",
" | Use \\multirow to enhance MultiIndex rows.\n",
" | Requires adding a \\\\usepackage{multirow} to your LaTeX preamble.\n",
" | Will print centered labels (instead of top-aligned)\n",
" | across the contained rows, separating groups via clines.\n",
" | The default will be read from the pandas config module.\n",
" | \n",
" | .. versionadded:: 0.20.0\n",
" | \n",
" | to_msgpack(self, path_or_buf=None, encoding='utf-8', **kwargs)\n",
" | msgpack (serialize) object to input file path\n",
" | \n",
" | THIS IS AN EXPERIMENTAL LIBRARY and the storage format\n",
" | may not be stable until a future release.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | path : string File path, buffer-like, or None\n",
" | if None, return generated string\n",
" | append : boolean whether to append to an existing msgpack\n",
" | (default is False)\n",
" | compress : type of compressor (zlib or blosc), default to None (no\n",
" | compression)\n",
" | \n",
" | to_pickle(self, path, compression='infer', protocol=4)\n",
" | Pickle (serialize) object to input file path.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | path : string\n",
" | File path\n",
" | compression : {'infer', 'gzip', 'bz2', 'xz', None}, default 'infer'\n",
" | a string representing the compression to use in the output file\n",
" | \n",
" | .. versionadded:: 0.20.0\n",
" | protocol : int\n",
" | Int which indicates which protocol should be used by the pickler,\n",
" | default HIGHEST_PROTOCOL (see [1], paragraph 12.1.2). The possible\n",
" | values for this parameter depend on the version of Python. For\n",
" | Python 2.x, possible values are 0, 1, 2. For Python>=3.0, 3 is a\n",
" | valid value. For Python >= 3.4, 4 is a valid value.A negative value\n",
" | for the protocol parameter is equivalent to setting its value to\n",
" | HIGHEST_PROTOCOL.\n",
" | \n",
" | .. [1] https://docs.python.org/3/library/pickle.html\n",
" | .. versionadded:: 0.21.0\n",
" | \n",
" | to_sql(self, name, con, flavor=None, schema=None, if_exists='fail', index=True, index_label=None, chunksize=None, dtype=None)\n",
" | Write records stored in a DataFrame to a SQL database.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | name : string\n",
" | Name of SQL table\n",
" | con : SQLAlchemy engine or DBAPI2 connection (legacy mode)\n",
" | Using SQLAlchemy makes it possible to use any DB supported by that\n",
" | library. If a DBAPI2 object, only sqlite3 is supported.\n",
" | flavor : 'sqlite', default None\n",
" | .. deprecated:: 0.19.0\n",
" | 'sqlite' is the only supported option if SQLAlchemy is not\n",
" | used.\n",
" | schema : string, default None\n",
" | Specify the schema (if database flavor supports this). If None, use\n",
" | default schema.\n",
" | if_exists : {'fail', 'replace', 'append'}, default 'fail'\n",
" | - fail: If table exists, do nothing.\n",
" | - replace: If table exists, drop it, recreate it, and insert data.\n",
" | - append: If table exists, insert data. Create if does not exist.\n",
" | index : boolean, default True\n",
" | Write DataFrame index as a column.\n",
" | index_label : string or sequence, default None\n",
" | Column label for index column(s). If None is given (default) and\n",
" | `index` is True, then the index names are used.\n",
" | A sequence should be given if the DataFrame uses MultiIndex.\n",
" | chunksize : int, default None\n",
" | If not None, then rows will be written in batches of this size at a\n",
" | time. If None, all rows will be written at once.\n",
" | dtype : dict of column name to SQL type, default None\n",
" | Optional specifying the datatype for columns. The SQL type should\n",
" | be a SQLAlchemy type, or a string for sqlite3 fallback connection.\n",
" | \n",
" | to_xarray(self)\n",
" | Return an xarray object from the pandas object.\n",
" | \n",
" | Returns\n",
" | -------\n",
" | a DataArray for a Series\n",
" | a Dataset for a DataFrame\n",
" | a DataArray for higher dims\n",
" | \n",
" | Examples\n",
" | --------\n",
" | >>> df = pd.DataFrame({'A' : [1, 1, 2],\n",
" | 'B' : ['foo', 'bar', 'foo'],\n",
" | 'C' : np.arange(4.,7)})\n",
" | >>> df\n",
" | A B C\n",
" | 0 1 foo 4.0\n",
" | 1 1 bar 5.0\n",
" | 2 2 foo 6.0\n",
" | \n",
" | >>> df.to_xarray()\n",
" | \n",
" | Dimensions: (index: 3)\n",
" | Coordinates:\n",
" | * index (index) int64 0 1 2\n",
" | Data variables:\n",
" | A (index) int64 1 1 2\n",
" | B (index) object 'foo' 'bar' 'foo'\n",
" | C (index) float64 4.0 5.0 6.0\n",
" | \n",
" | >>> df = pd.DataFrame({'A' : [1, 1, 2],\n",
" | 'B' : ['foo', 'bar', 'foo'],\n",
" | 'C' : np.arange(4.,7)}\n",
" | ).set_index(['B','A'])\n",
" | >>> df\n",
" | C\n",
" | B A\n",
" | foo 1 4.0\n",
" | bar 1 5.0\n",
" | foo 2 6.0\n",
" | \n",
" | >>> df.to_xarray()\n",
" | \n",
" | Dimensions: (A: 2, B: 2)\n",
" | Coordinates:\n",
" | * B (B) object 'bar' 'foo'\n",
" | * A (A) int64 1 2\n",
" | Data variables:\n",
" | C (B, A) float64 5.0 nan 4.0 6.0\n",
" | \n",
" | >>> p = pd.Panel(np.arange(24).reshape(4,3,2),\n",
" | items=list('ABCD'),\n",
" | major_axis=pd.date_range('20130101', periods=3),\n",
" | minor_axis=['first', 'second'])\n",
" | >>> p\n",
" | \n",
" | Dimensions: 4 (items) x 3 (major_axis) x 2 (minor_axis)\n",
" | Items axis: A to D\n",
" | Major_axis axis: 2013-01-01 00:00:00 to 2013-01-03 00:00:00\n",
" | Minor_axis axis: first to second\n",
" | \n",
" | >>> p.to_xarray()\n",
" | \n",
" | array([[[ 0, 1],\n",
" | [ 2, 3],\n",
" | [ 4, 5]],\n",
" | [[ 6, 7],\n",
" | [ 8, 9],\n",
" | [10, 11]],\n",
" | [[12, 13],\n",
" | [14, 15],\n",
" | [16, 17]],\n",
" | [[18, 19],\n",
" | [20, 21],\n",
" | [22, 23]]])\n",
" | Coordinates:\n",
" | * items (items) object 'A' 'B' 'C' 'D'\n",
" | * major_axis (major_axis) datetime64[ns] 2013-01-01 2013-01-02 2013-01-03 # noqa\n",
" | * minor_axis (minor_axis) object 'first' 'second'\n",
" | \n",
" | Notes\n",
" | -----\n",
" | See the `xarray docs `__\n",
" | \n",
" | truncate(self, before=None, after=None, axis=None, copy=True)\n",
" | Truncates a sorted DataFrame/Series before and/or after some\n",
" | particular index value. If the axis contains only datetime values,\n",
" | before/after parameters are converted to datetime values.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | before : date, string, int\n",
" | Truncate all rows before this index value\n",
" | after : date, string, int\n",
" | Truncate all rows after this index value\n",
" | axis : {0 or 'index', 1 or 'columns'}\n",
" | \n",
" | * 0 or 'index': apply truncation to rows\n",
" | * 1 or 'columns': apply truncation to columns\n",
" | Default is stat axis for given data type (0 for Series and\n",
" | DataFrames, 1 for Panels)\n",
" | copy : boolean, default is True,\n",
" | return a copy of the truncated section\n",
" | \n",
" | Returns\n",
" | -------\n",
" | truncated : type of caller\n",
" | \n",
" | Examples\n",
" | --------\n",
" | >>> df = pd.DataFrame({'A': ['a', 'b', 'c', 'd', 'e'],\n",
" | ... 'B': ['f', 'g', 'h', 'i', 'j'],\n",
" | ... 'C': ['k', 'l', 'm', 'n', 'o']},\n",
" | ... index=[1, 2, 3, 4, 5])\n",
" | >>> df.truncate(before=2, after=4)\n",
" | A B C\n",
" | 2 b g l\n",
" | 3 c h m\n",
" | 4 d i n\n",
" | >>> df = pd.DataFrame({'A': [1, 2, 3, 4, 5],\n",
" | ... 'B': [6, 7, 8, 9, 10],\n",
" | ... 'C': [11, 12, 13, 14, 15]},\n",
" | ... index=['a', 'b', 'c', 'd', 'e'])\n",
" | >>> df.truncate(before='b', after='d')\n",
" | A B C\n",
" | b 2 7 12\n",
" | c 3 8 13\n",
" | d 4 9 14\n",
" | \n",
" | The index values in ``truncate`` can be datetimes or string\n",
" | dates. Note that ``truncate`` assumes a 0 value for any unspecified\n",
" | date component in a ``DatetimeIndex`` in contrast to slicing which\n",
" | returns any partially matching dates.\n",
" | \n",
" | >>> dates = pd.date_range('2016-01-01', '2016-02-01', freq='s')\n",
" | >>> df = pd.DataFrame(index=dates, data={'A': 1})\n",
" | >>> df.truncate('2016-01-05', '2016-01-10').tail()\n",
" | A\n",
" | 2016-01-09 23:59:56 1\n",
" | 2016-01-09 23:59:57 1\n",
" | 2016-01-09 23:59:58 1\n",
" | 2016-01-09 23:59:59 1\n",
" | 2016-01-10 00:00:00 1\n",
" | >>> df.loc['2016-01-05':'2016-01-10', :].tail()\n",
" | A\n",
" | 2016-01-10 23:59:55 1\n",
" | 2016-01-10 23:59:56 1\n",
" | 2016-01-10 23:59:57 1\n",
" | 2016-01-10 23:59:58 1\n",
" | 2016-01-10 23:59:59 1\n",
" | \n",
" | tshift(self, periods=1, freq=None, axis=0)\n",
" | Shift the time index, using the index's frequency if available.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | periods : int\n",
" | Number of periods to move, can be positive or negative\n",
" | freq : DateOffset, timedelta, or time rule string, default None\n",
" | Increment to use from the tseries module or time rule (e.g. 'EOM')\n",
" | axis : int or basestring\n",
" | Corresponds to the axis that contains the Index\n",
" | \n",
" | Notes\n",
" | -----\n",
" | If freq is not specified then tries to use the freq or inferred_freq\n",
" | attributes of the index. If neither of those attributes exist, a\n",
" | ValueError is thrown\n",
" | \n",
" | Returns\n",
" | -------\n",
" | shifted : NDFrame\n",
" | \n",
" | tz_convert(self, tz, axis=0, level=None, copy=True)\n",
" | Convert tz-aware axis to target time zone.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | tz : string or pytz.timezone object\n",
" | axis : the axis to convert\n",
" | level : int, str, default None\n",
" | If axis ia a MultiIndex, convert a specific level. Otherwise\n",
" | must be None\n",
" | copy : boolean, default True\n",
" | Also make a copy of the underlying data\n",
" | \n",
" | Returns\n",
" | -------\n",
" | \n",
" | Raises\n",
" | ------\n",
" | TypeError\n",
" | If the axis is tz-naive.\n",
" | \n",
" | tz_localize(self, tz, axis=0, level=None, copy=True, ambiguous='raise')\n",
" | Localize tz-naive TimeSeries to target time zone.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | tz : string or pytz.timezone object\n",
" | axis : the axis to localize\n",
" | level : int, str, default None\n",
" | If axis ia a MultiIndex, localize a specific level. Otherwise\n",
" | must be None\n",
" | copy : boolean, default True\n",
" | Also make a copy of the underlying data\n",
" | ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise'\n",
" | - 'infer' will attempt to infer fall dst-transition hours based on\n",
" | order\n",
" | - bool-ndarray where True signifies a DST time, False designates\n",
" | a non-DST time (note that this flag is only applicable for\n",
" | ambiguous times)\n",
" | - 'NaT' will return NaT where there are ambiguous times\n",
" | - 'raise' will raise an AmbiguousTimeError if there are ambiguous\n",
" | times\n",
" | infer_dst : boolean, default False\n",
" | .. deprecated:: 0.15.0\n",
" | Attempt to infer fall dst-transition hours based on order\n",
" | \n",
" | Returns\n",
" | -------\n",
" | \n",
" | Raises\n",
" | ------\n",
" | TypeError\n",
" | If the TimeSeries is tz-aware and tz is not None.\n",
" | \n",
" | where(self, cond, other=nan, inplace=False, axis=None, level=None, errors='raise', try_cast=False, raise_on_error=None)\n",
" | Return an object of same shape as self and whose corresponding\n",
" | entries are from self where `cond` is True and otherwise are from\n",
" | `other`.\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | cond : boolean NDFrame, array-like, or callable\n",
" | Where `cond` is True, keep the original value. Where\n",
" | False, replace with corresponding value from `other`.\n",
" | If `cond` is callable, it is computed on the NDFrame and\n",
" | should return boolean NDFrame or array. The callable must\n",
" | not change input NDFrame (though pandas doesn't check it).\n",
" | \n",
" | .. versionadded:: 0.18.1\n",
" | A callable can be used as cond.\n",
" | \n",
" | other : scalar, NDFrame, or callable\n",
" | Entries where `cond` is False are replaced with\n",
" | corresponding value from `other`.\n",
" | If other is callable, it is computed on the NDFrame and\n",
" | should return scalar or NDFrame. The callable must not\n",
" | change input NDFrame (though pandas doesn't check it).\n",
" | \n",
" | .. versionadded:: 0.18.1\n",
" | A callable can be used as other.\n",
" | \n",
" | inplace : boolean, default False\n",
" | Whether to perform the operation in place on the data\n",
" | axis : alignment axis if needed, default None\n",
" | level : alignment level if needed, default None\n",
" | errors : str, {'raise', 'ignore'}, default 'raise'\n",
" | - ``raise`` : allow exceptions to be raised\n",
" | - ``ignore`` : suppress exceptions. On error return original object\n",
" | \n",
" | Note that currently this parameter won't affect\n",
" | the results and will always coerce to a suitable dtype.\n",
" | \n",
" | try_cast : boolean, default False\n",
" | try to cast the result back to the input type (if possible),\n",
" | raise_on_error : boolean, default True\n",
" | Whether to raise on invalid data types (e.g. trying to where on\n",
" | strings)\n",
" | \n",
" | .. deprecated:: 0.21.0\n",
" | \n",
" | Returns\n",
" | -------\n",
" | wh : same type as caller\n",
" | \n",
" | Notes\n",
" | -----\n",
" | The where method is an application of the if-then idiom. For each\n",
" | element in the calling DataFrame, if ``cond`` is ``True`` the\n",
" | element is used; otherwise the corresponding element from the DataFrame\n",
" | ``other`` is used.\n",
" | \n",
" | The signature for :func:`DataFrame.where` differs from\n",
" | :func:`numpy.where`. Roughly ``df1.where(m, df2)`` is equivalent to\n",
" | ``np.where(m, df1, df2)``.\n",
" | \n",
" | For further details and examples see the ``where`` documentation in\n",
" | :ref:`indexing `.\n",
" | \n",
" | Examples\n",
" | --------\n",
" | >>> s = pd.Series(range(5))\n",
" | >>> s.where(s > 0)\n",
" | 0 NaN\n",
" | 1 1.0\n",
" | 2 2.0\n",
" | 3 3.0\n",
" | 4 4.0\n",
" | \n",
" | >>> s.mask(s > 0)\n",
" | 0 0.0\n",
" | 1 NaN\n",
" | 2 NaN\n",
" | 3 NaN\n",
" | 4 NaN\n",
" | \n",
" | >>> s.where(s > 1, 10)\n",
" | 0 10.0\n",
" | 1 10.0\n",
" | 2 2.0\n",
" | 3 3.0\n",
" | 4 4.0\n",
" | \n",
" | >>> df = pd.DataFrame(np.arange(10).reshape(-1, 2), columns=['A', 'B'])\n",
" | >>> m = df % 3 == 0\n",
" | >>> df.where(m, -df)\n",
" | A B\n",
" | 0 0 -1\n",
" | 1 -2 3\n",
" | 2 -4 -5\n",
" | 3 6 -7\n",
" | 4 -8 9\n",
" | >>> df.where(m, -df) == np.where(m, df, -df)\n",
" | A B\n",
" | 0 True True\n",
" | 1 True True\n",
" | 2 True True\n",
" | 3 True True\n",
" | 4 True True\n",
" | >>> df.where(m, -df) == df.mask(~m, -df)\n",
" | A B\n",
" | 0 True True\n",
" | 1 True True\n",
" | 2 True True\n",
" | 3 True True\n",
" | 4 True True\n",
" | \n",
" | See Also\n",
" | --------\n",
" | :func:`DataFrame.mask`\n",
" | \n",
" | xs(self, key, axis=0, level=None, drop_level=True)\n",
" | Returns a cross-section (row(s) or column(s)) from the\n",
" | Series/DataFrame. Defaults to cross-section on the rows (axis=0).\n",
" | \n",
" | Parameters\n",
" | ----------\n",
" | key : object\n",
" | Some label contained in the index, or partially in a MultiIndex\n",
" | axis : int, default 0\n",
" | Axis to retrieve cross-section on\n",
" | level : object, defaults to first n levels (n=1 or len(key))\n",
" | In case of a key partially contained in a MultiIndex, indicate\n",
" | which levels are used. Levels can be referred by label or position.\n",
" | drop_level : boolean, default True\n",
" | If False, returns object with same levels as self.\n",
" | \n",
" | Examples\n",
" | --------\n",
" | >>> df\n",
" | A B C\n",
" | a 4 5 2\n",
" | b 4 0 9\n",
" | c 9 7 3\n",
" | >>> df.xs('a')\n",
" | A 4\n",
" | B 5\n",
" | C 2\n",
" | Name: a\n",
" | >>> df.xs('C', axis=1)\n",
" | a 2\n",
" | b 9\n",
" | c 3\n",
" | Name: C\n",
" | \n",
" | >>> df\n",
" | A B C D\n",
" | first second third\n",
" | bar one 1 4 1 8 9\n",
" | two 1 7 5 5 0\n",
" | baz one 1 6 6 8 0\n",
" | three 2 5 3 5 3\n",
" | >>> df.xs(('baz', 'three'))\n",
" | A B C D\n",
" | third\n",
" | 2 5 3 5 3\n",
" | >>> df.xs('one', level=1)\n",
" | A B C D\n",
" | first third\n",
" | bar 1 4 1 8 9\n",
" | baz 1 6 6 8 0\n",
" | >>> df.xs(('baz', 2), level=[0, 'third'])\n",
" | A B C D\n",
" | second\n",
" | three 5 3 5 3\n",
" | \n",
" | Returns\n",
" | -------\n",
" | xs : Series or DataFrame\n",
" | \n",
" | Notes\n",
" | -----\n",
" | xs is only for getting, not setting values.\n",
" | \n",
" | MultiIndex Slicers is a generic way to get/set values on any level or\n",
" | levels. It is a superset of xs functionality, see\n",
" | :ref:`MultiIndex Slicers `\n",
" | \n",
" | ----------------------------------------------------------------------\n",
" | Data descriptors inherited from pandas.core.generic.NDFrame:\n",
" | \n",
" | at\n",
" | Fast label-based scalar accessor\n",
" | \n",
" | Similarly to ``loc``, ``at`` provides **label** based scalar lookups.\n",
" | You can also set using these indexers.\n",
" | \n",
" | blocks\n",
" | Internal property, property synonym for as_blocks()\n",
" | \n",
" | .. deprecated:: 0.21.0\n",
" | \n",
" | dtypes\n",
" | Return the dtypes in this object.\n",
" | \n",
" | empty\n",
" | True if NDFrame is entirely empty [no items], meaning any of the\n",
" | axes are of length 0.\n",
" | \n",
" | Notes\n",
" | -----\n",
" | If NDFrame contains only NaNs, it is still not considered empty. See\n",
" | the example below.\n",
" | \n",
" | Examples\n",
" | --------\n",
" | An example of an actual empty DataFrame. Notice the index is empty:\n",
" | \n",
" | >>> df_empty = pd.DataFrame({'A' : []})\n",
" | >>> df_empty\n",
" | Empty DataFrame\n",
" | Columns: [A]\n",
" | Index: []\n",
" | >>> df_empty.empty\n",
" | True\n",
" | \n",
" | If we only have NaNs in our DataFrame, it is not considered empty! We\n",
" | will need to drop the NaNs to make the DataFrame empty:\n",
" | \n",
" | >>> df = pd.DataFrame({'A' : [np.nan]})\n",
" | >>> df\n",
" | A\n",
" | 0 NaN\n",
" | >>> df.empty\n",
" | False\n",
" | >>> df.dropna().empty\n",
" | True\n",
" | \n",
" | See also\n",
" | --------\n",
" | pandas.Series.dropna\n",
" | pandas.DataFrame.dropna\n",
" | \n",
" | ftypes\n",
" | Return the ftypes (indication of sparse/dense and dtype)\n",
" | in this object.\n",
" | \n",
" | iat\n",
" | Fast integer location scalar accessor.\n",
" | \n",
" | Similarly to ``iloc``, ``iat`` provides **integer** based lookups.\n",
" | You can also set using these indexers.\n",
" | \n",
" | iloc\n",
" | Purely integer-location based indexing for selection by position.\n",
" | \n",
" | ``.iloc[]`` is primarily integer position based (from ``0`` to\n",
" | ``length-1`` of the axis), but may also be used with a boolean\n",
" | array.\n",
" | \n",
" | Allowed inputs are:\n",
" | \n",
" | - An integer, e.g. ``5``.\n",
" | - A list or array of integers, e.g. ``[4, 3, 0]``.\n",
" | - A slice object with ints, e.g. ``1:7``.\n",
" | - A boolean array.\n",
" | - A ``callable`` function with one argument (the calling Series, DataFrame\n",
" | or Panel) and that returns valid output for indexing (one of the above)\n",
" | \n",
" | ``.iloc`` will raise ``IndexError`` if a requested indexer is\n",
" | out-of-bounds, except *slice* indexers which allow out-of-bounds\n",
" | indexing (this conforms with python/numpy *slice* semantics).\n",
" | \n",
" | See more at :ref:`Selection by Position `\n",
" | \n",
" | ix\n",
" | A primarily label-location based indexer, with integer position\n",
" | fallback.\n",
" | \n",
" | ``.ix[]`` supports mixed integer and label based access. It is\n",
" | primarily label based, but will fall back to integer positional\n",
" | access unless the corresponding axis is of integer type.\n",
" | \n",
" | ``.ix`` is the most general indexer and will support any of the\n",
" | inputs in ``.loc`` and ``.iloc``. ``.ix`` also supports floating\n",
" | point label schemes. ``.ix`` is exceptionally useful when dealing\n",
" | with mixed positional and label based hierachical indexes.\n",
" | \n",
" | However, when an axis is integer based, ONLY label based access\n",
" | and not positional access is supported. Thus, in such cases, it's\n",
" | usually better to be explicit and use ``.iloc`` or ``.loc``.\n",
" | \n",
" | See more at :ref:`Advanced Indexing `.\n",
" | \n",
" | loc\n",
" | Purely label-location based indexer for selection by label.\n",
" | \n",
" | ``.loc[]`` is primarily label based, but may also be used with a\n",
" | boolean array.\n",
" | \n",
" | Allowed inputs are:\n",
" | \n",
" | - A single label, e.g. ``5`` or ``'a'``, (note that ``5`` is\n",
" | interpreted as a *label* of the index, and **never** as an\n",
" | integer position along the index).\n",
" | - A list or array of labels, e.g. ``['a', 'b', 'c']``.\n",
" | - A slice object with labels, e.g. ``'a':'f'`` (note that contrary\n",
" | to usual python slices, **both** the start and the stop are included!).\n",
" | - A boolean array.\n",
" | - A ``callable`` function with one argument (the calling Series, DataFrame\n",
" | or Panel) and that returns valid output for indexing (one of the above)\n",
" | \n",
" | ``.loc`` will raise a ``KeyError`` when the items are not found.\n",
" | \n",
" | See more at :ref:`Selection by Label `\n",
" | \n",
" | ndim\n",
" | Number of axes / array dimensions\n",
" | \n",
" | size\n",
" | number of elements in the NDFrame\n",
" | \n",
" | values\n",
" | Numpy representation of NDFrame\n",
" | \n",
" | Notes\n",
" | -----\n",
" | The dtype will be a lower-common-denominator dtype (implicit\n",
" | upcasting); that is to say if the dtypes (even of numeric types)\n",
" | are mixed, the one that accommodates all will be chosen. Use this\n",
" | with care if you are not dealing with the blocks.\n",
" | \n",
" | e.g. If the dtypes are float16 and float32, dtype will be upcast to\n",
" | float32. If dtypes are int32 and uint8, dtype will be upcast to\n",
" | int32. By numpy.find_common_type convention, mixing int64 and uint64\n",
" | will result in a flot64 dtype.\n",
" | \n",
" | ----------------------------------------------------------------------\n",
" | Data and other attributes inherited from pandas.core.generic.NDFrame:\n",
" | \n",
" | is_copy = None\n",
" | \n",
" | ----------------------------------------------------------------------\n",
" | Methods inherited from pandas.core.base.PandasObject:\n",
" | \n",
" | __sizeof__(self)\n",
" | Generates the total memory usage for a object that returns\n",
" | either a value or Series of values\n",
" | \n",
" | ----------------------------------------------------------------------\n",
" | Methods inherited from pandas.core.base.StringMixin:\n",
" | \n",
" | __bytes__(self)\n",
" | Return a string representation for a particular object.\n",
" | \n",
" | Invoked by bytes(obj) in py3 only.\n",
" | Yields a bytestring in both py2/py3.\n",
" | \n",
" | __repr__(self)\n",
" | Return a string representation for a particular object.\n",
" | \n",
" | Yields Bytestring in Py2, Unicode String in py3.\n",
" | \n",
" | __str__(self)\n",
" | Return a string representation for a particular Object\n",
" | \n",
" | Invoked by str(df) in both py2/py3.\n",
" | Yields Bytestring in Py2, Unicode String in py3.\n",
" | \n",
" | ----------------------------------------------------------------------\n",
" | Data descriptors inherited from pandas.core.base.StringMixin:\n",
" | \n",
" | __dict__\n",
" | dictionary for instance variables (if defined)\n",
" | \n",
" | __weakref__\n",
" | list of weak references to the object (if defined)\n",
" | \n",
" | ----------------------------------------------------------------------\n",
" | Methods inherited from pandas.core.accessor.DirNamesMixin:\n",
" | \n",
" | __dir__(self)\n",
" | Provide method name lookup and completion\n",
" | Only provide 'public' methods\n",
"\n"
]
}
],
"source": [
"help(iris)"
]
},
{
"cell_type": "code",
"execution_count": 184,
"metadata": {
"ExecuteTime": {
"end_time": "2019-04-29T13:16:53.704794Z",
"start_time": "2019-04-29T13:16:53.699549Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"Junior 6\n",
"Senior 5\n",
"Name: status, dtype: int64"
]
},
"execution_count": 184,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#计算每列不同元素的数量\n",
"coun=iris['status'].value_counts()\n",
"coun"
]
},
{
"cell_type": "code",
"execution_count": 185,
"metadata": {
"ExecuteTime": {
"end_time": "2019-04-29T13:16:53.716687Z",
"start_time": "2019-04-29T13:16:53.706388Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"Junior 0.545455\n",
"Senior 0.454545\n",
"Name: status, dtype: float64"
]
},
"execution_count": 185,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#计算概率\n",
"rat=coun/sum(coun)\n",
"rat"
]
},
{
"cell_type": "code",
"execution_count": 186,
"metadata": {
"ExecuteTime": {
"end_time": "2019-04-29T13:16:53.728782Z",
"start_time": "2019-04-29T13:16:53.718418Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['department', 'status', 'age', 'salary'], dtype='object')"
]
},
"execution_count": 186,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#存储列索引\n",
"spec=iris.columns\n",
"spec"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### 将以上两者合成形成p(x) px函数"
]
},
{
"cell_type": "markdown",
"metadata": {
"ExecuteTime": {
"end_time": "2019-04-29T11:01:39.448922Z",
"start_time": "2019-04-29T11:01:39.439837Z"
}
},
"source": [
"#### 计算p(column1|column2)\n"
]
},
{
"cell_type": "code",
"execution_count": 187,
"metadata": {
"ExecuteTime": {
"end_time": "2019-04-29T13:16:53.754313Z",
"start_time": "2019-04-29T13:16:53.731136Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" department status age salary\n",
"1 Sales Junior 2630 26K30K\n",
"2 Sales Junior 3135 31K35K\n",
"3 Systems Junior 2125 46K50K\n",
"5 Systems Junior 2630 46K50K\n",
"8 Marketing Junior 3135 41K45K\n",
"10 Secretary Junior 2630 26K30K\n",
"Junior 1.0\n",
"Name: status, dtype: float64 @@@@@@@ status\n",
" department status age salary\n",
"0 Sales Senior 3135 46K50K\n",
"4 Systems Senior 3135 66K70K\n",
"6 Systems Senior 4145 66K70K\n",
"7 Marketing Senior 3640 46K50K\n",
"9 Secretary Senior 4650 36K40K\n",
"Senior 1.0\n",
"Name: status, dtype: float64 @@@@@@@ status\n"
]
}
],
"source": [
"D='status'\n",
"daframe=iris\n",
"for i in range(len(iris[D].value_counts())):\n",
" #为dataframe中D的每一个取值拆分出叫做temp的,包含该取值的dataframe\n",
" temp=daframe[daframe[D]==coun.index[i]]\n",
" print(temp)\n",
" print(ent(temp,D),'@@@@@@@',D)\n",
" #print(ent(temp,D))\n",
" #print(gain)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### 这样我们得到了基于status分块的矩阵,接下来是分别调用px函数完成p(x|c)"
]
},
{
"cell_type": "code",
"execution_count": 188,
"metadata": {
"ExecuteTime": {
"end_time": "2019-04-29T13:16:53.765040Z",
"start_time": "2019-04-29T13:16:53.756167Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Help on built-in function print in module builtins:\n",
"\n",
"print(...)\n",
" print(value, ..., sep=' ', end='\\n', file=sys.stdout, flush=False)\n",
" \n",
" Prints the values to a stream, or to sys.stdout by default.\n",
" Optional keyword arguments:\n",
" file: a file-like object (stream); defaults to the current sys.stdout.\n",
" sep: string inserted between values, default a space.\n",
" end: string appended after the last value, default a newline.\n",
" flush: whether to forcibly flush the stream.\n",
"\n"
]
}
],
"source": [
"help(print)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 自闭调试"
]
},
{
"cell_type": "code",
"execution_count": 189,
"metadata": {
"ExecuteTime": {
"end_time": "2019-04-29T13:16:53.785360Z",
"start_time": "2019-04-29T13:16:53.767344Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Junior Senior\n",
"Systems 0.0 0.0\n",
"Sales 0.0 0.0\n",
"Marketing 0.0 0.0\n",
"Secretary 0.0 0.0\n",
"Junior 0.0 0.0\n",
"Senior 0.0 0.0\n",
"3135 0.0 0.0\n",
"2630 0.0 0.0\n",
"2125 0.0 0.0\n",
"4650 0.0 0.0\n",
"3640 0.0 0.0\n",
"4145 0.0 0.0\n",
"46K50K 0.0 0.0\n",
"66K70K 0.0 0.0\n",
"26K30K 0.0 0.0\n",
"31K35K 0.0 0.0\n",
"36K40K 0.0 0.0\n",
"41K45K 0.0 0.0\n",
"Index([ 'Systems', 'Sales', 'Marketing', 'Secretary', 'Junior',\n",
" 'Senior', 3135, 2630, 2125, 4650,\n",
" 3640, 4145, '46K50K', '66K70K', '26K30K',\n",
" '31K35K', '36K40K', '41K45K'],\n",
" dtype='object')\n"
]
}
],
"source": [
"D='status'\n",
"daframe=iris\n",
"indextp=[]\n",
"for j in spec:\n",
" for i in daframe[j].value_counts().index:\n",
" indextp.append(i)\n",
"frame = pd.DataFrame(np.zeros([len(indextp), len(iris[D].value_counts())]), \n",
" columns=daframe[D].value_counts().index, \n",
" index=indextp)\n",
"print(frame)\n",
"print(frame.index)"
]
},
{
"cell_type": "code",
"execution_count": 195,
"metadata": {
"ExecuteTime": {
"end_time": "2019-04-29T14:10:33.013469Z",
"start_time": "2019-04-29T14:10:33.008483Z"
},
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.0\n"
]
},
{
"data": {
"text/plain": [
"('salary', 'Junior')"
]
},
"execution_count": 195,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print(frame.loc['Senior','Junior'])\n",
"j,iris[D].value_counts().index[0]"
]
},
{
"cell_type": "code",
"execution_count": 196,
"metadata": {
"ExecuteTime": {
"end_time": "2019-04-29T14:10:39.809907Z",
"start_time": "2019-04-29T14:10:39.805754Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['46K50K', '66K70K', '26K30K', '31K35K', '36K40K', '41K45K'], dtype='object')"
]
},
"execution_count": 196,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"iris['salary'].value_counts().index"
]
},
{
"cell_type": "code",
"execution_count": 203,
"metadata": {
"ExecuteTime": {
"end_time": "2019-04-29T14:16:59.549545Z",
"start_time": "2019-04-29T14:16:59.347219Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Junior\n",
"Junior\n",
"Junior\n",
"Junior\n",
"Junior\n",
"Junior\n",
"Junior\n",
"error happened\n",
" 2630 0.500000\n",
"3135 0.333333\n",
"2125 0.166667\n",
"Name: age, dtype: float64 \n",
" 4650\n",
"Junior\n",
"error happened\n",
" 2630 0.500000\n",
"3135 0.333333\n",
"2125 0.166667\n",
"Name: age, dtype: float64 \n",
" 3640\n",
"Junior\n",
"error happened\n",
" 2630 0.500000\n",
"3135 0.333333\n",
"2125 0.166667\n",
"Name: age, dtype: float64 \n",
" 4145\n",
"Junior\n",
"Junior\n",
"error happened\n",
" 46K50K 0.333333\n",
"26K30K 0.333333\n",
"31K35K 0.166667\n",
"41K45K 0.166667\n",
"Name: salary, dtype: float64 \n",
" 66K70K\n",
"Junior\n",
"Junior\n",
"Junior\n",
"error happened\n",
" 46K50K 0.333333\n",
"26K30K 0.333333\n",
"31K35K 0.166667\n",
"41K45K 0.166667\n",
"Name: salary, dtype: float64 \n",
" 36K40K\n",
"Junior\n",
"Junior\n",
"Senior\n",
"Senior\n",
"Senior\n",
"Senior\n",
"Senior\n",
"error happened\n",
" 3135 0.4\n",
"4650 0.2\n",
"4145 0.2\n",
"3640 0.2\n",
"Name: age, dtype: float64 \n",
" 2630\n",
"Senior\n",
"error happened\n",
" 3135 0.4\n",
"4650 0.2\n",
"4145 0.2\n",
"3640 0.2\n",
"Name: age, dtype: float64 \n",
" 2125\n",
"Senior\n",
"Senior\n",
"Senior\n",
"Senior\n",
"Senior\n",
"Senior\n",
"error happened\n",
" 46K50K 0.4\n",
"66K70K 0.4\n",
"36K40K 0.2\n",
"Name: salary, dtype: float64 \n",
" 26K30K\n",
"Senior\n",
"error happened\n",
" 46K50K 0.4\n",
"66K70K 0.4\n",
"36K40K 0.2\n",
"Name: salary, dtype: float64 \n",
" 31K35K\n",
"Senior\n",
"Senior\n",
"error happened\n",
" 46K50K 0.4\n",
"66K70K 0.4\n",
"36K40K 0.2\n",
"Name: salary, dtype: float64 \n",
" 41K45K\n",
"Senior\n",
" Junior Senior\n",
"Systems 0.333333 0.4\n",
"Sales 0.333333 0.2\n",
"Marketing 0.166667 0.2\n",
"Secretary 0.166667 0.2\n",
"Junior 0.000000 0.0\n",
"Senior 0.000000 0.0\n",
"3135 0.333333 0.4\n",
"2630 0.500000 0.0\n",
"2125 0.166667 0.0\n",
"4650 0.000000 0.2\n",
"3640 0.000000 0.2\n",
"4145 0.000000 0.2\n",
"46K50K 0.333333 0.4\n",
"66K70K 0.000000 0.4\n",
"26K30K 0.333333 0.0\n",
"31K35K 0.166667 0.0\n",
"36K40K 0.000000 0.2\n",
"41K45K 0.166667 0.0\n"
]
}
],
"source": [
"D='status'\n",
"daframe=iris\n",
"indextp=[]\n",
"for j in spec:\n",
" for i in daframe[j].value_counts().index:\n",
" indextp.append(i)\n",
"frame = pd.DataFrame(np.zeros([len(indextp), len(iris[D].value_counts())]), \n",
" columns=daframe[D].value_counts().index, \n",
" index=indextp)\n",
"\n",
"\n",
"\n",
"for i in range(len(iris[D].value_counts())):\n",
" #为dataframe中D的每一个取值拆分出叫做temp的,包含该取值的dataframe\n",
" temp=daframe[daframe[D]==coun.index[i]]\n",
" for j in spec:\n",
" if j!=D:\n",
" for k in iris[j].value_counts().index:\n",
" #print('k=',k,px(temp,j))\n",
" try:\n",
" frame.loc[k,iris[D].value_counts().index[i]]=px(temp,j)[k]\n",
" except:\n",
" print('error happened\\n',px(temp,j),'\\n',k)\n",
" print(iris[D].value_counts().index[i])\n",
" #pd.Series(r)\n",
" #print(\"__________\\n\",r,\"\\np({}|{})\".format(j,iris[D].value_counts().index[i]))\n",
" #print(ent(temp,D))\n",
" #print(gain)\n",
"print(frame)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 计算p(x)"
]
},
{
"cell_type": "code",
"execution_count": 213,
"metadata": {
"ExecuteTime": {
"end_time": "2019-04-29T14:37:22.487391Z",
"start_time": "2019-04-29T14:37:22.478248Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Junior 0.545455\n",
"Senior 0.454545\n",
"Name: status, dtype: float64\n"
]
},
{
"data": {
"text/plain": [
"0.5454545454545454"
]
},
"execution_count": 213,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def px(daframe,colum):\n",
" \"\"\"输入dataframe以及需要计算熵的列索引,输出熵\"\"\"\n",
" coun=daframe[colum].value_counts()\n",
" rat=coun/sum(coun)\n",
" return rat\n",
"print(px(iris,'status'))\n",
"px(iris,'status')['Junior']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 计算相对概率"
]
},
{
"cell_type": "code",
"execution_count": 212,
"metadata": {
"ExecuteTime": {
"end_time": "2019-04-29T14:35:39.113525Z",
"start_time": "2019-04-29T14:35:39.018620Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Junior | \n",
" Senior | \n",
"
\n",
" \n",
" \n",
" \n",
" | Systems | \n",
" 0.333333 | \n",
" 0.4 | \n",
"
\n",
" \n",
" | Sales | \n",
" 0.333333 | \n",
" 0.2 | \n",
"
\n",
" \n",
" | Marketing | \n",
" 0.166667 | \n",
" 0.2 | \n",
"
\n",
" \n",
" | Secretary | \n",
" 0.166667 | \n",
" 0.2 | \n",
"
\n",
" \n",
" | Junior | \n",
" 0.000000 | \n",
" 0.0 | \n",
"
\n",
" \n",
" | Senior | \n",
" 0.000000 | \n",
" 0.0 | \n",
"
\n",
" \n",
" | 3135 | \n",
" 0.333333 | \n",
" 0.4 | \n",
"
\n",
" \n",
" | 2630 | \n",
" 0.500000 | \n",
" 0.0 | \n",
"
\n",
" \n",
" | 2125 | \n",
" 0.166667 | \n",
" 0.0 | \n",
"
\n",
" \n",
" | 4650 | \n",
" 0.000000 | \n",
" 0.2 | \n",
"
\n",
" \n",
" | 3640 | \n",
" 0.000000 | \n",
" 0.2 | \n",
"
\n",
" \n",
" | 4145 | \n",
" 0.000000 | \n",
" 0.2 | \n",
"
\n",
" \n",
" | 46K50K | \n",
" 0.333333 | \n",
" 0.4 | \n",
"
\n",
" \n",
" | 66K70K | \n",
" 0.000000 | \n",
" 0.4 | \n",
"
\n",
" \n",
" | 26K30K | \n",
" 0.333333 | \n",
" 0.0 | \n",
"
\n",
" \n",
" | 31K35K | \n",
" 0.166667 | \n",
" 0.0 | \n",
"
\n",
" \n",
" | 36K40K | \n",
" 0.000000 | \n",
" 0.2 | \n",
"
\n",
" \n",
" | 41K45K | \n",
" 0.166667 | \n",
" 0.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Junior Senior\n",
"Systems 0.333333 0.4\n",
"Sales 0.333333 0.2\n",
"Marketing 0.166667 0.2\n",
"Secretary 0.166667 0.2\n",
"Junior 0.000000 0.0\n",
"Senior 0.000000 0.0\n",
"3135 0.333333 0.4\n",
"2630 0.500000 0.0\n",
"2125 0.166667 0.0\n",
"4650 0.000000 0.2\n",
"3640 0.000000 0.2\n",
"4145 0.000000 0.2\n",
"46K50K 0.333333 0.4\n",
"66K70K 0.000000 0.4\n",
"26K30K 0.333333 0.0\n",
"31K35K 0.166667 0.0\n",
"36K40K 0.000000 0.2\n",
"41K45K 0.166667 0.0"
]
},
"execution_count": 212,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def pxy(daframe,D):\n",
" \"\"\"\n",
" 输入dataframe和准备预测的对象所在列\n",
" 输出为一个dataframe\n",
" 其中列是预测所在列的所有元素\n",
" 行是 该dataframe所有变量所在\n",
" \"\"\"\n",
" #D='status'\n",
" #daframe=iris\n",
" indextp=[]\n",
" for j in spec:\n",
" for i in daframe[j].value_counts().index:\n",
" indextp.append(i)\n",
" frame = pd.DataFrame(np.zeros([len(indextp), len(iris[D].value_counts())]), \n",
" columns=daframe[D].value_counts().index, \n",
" index=indextp)\n",
"\n",
"\n",
"\n",
" for i in range(len(iris[D].value_counts())):\n",
" #为dataframe中D的每一个取值拆分出叫做temp的,包含该取值的dataframe\n",
" temp=daframe[daframe[D]==coun.index[i]]\n",
" for j in spec:\n",
" if j!=D:\n",
" for k in iris[j].value_counts().index:\n",
" #print('k=',k,px(temp,j))\n",
" try:\n",
" frame.loc[k,iris[D].value_counts().index[i]]=px(temp,j)[k]\n",
" except:\n",
" #print('error happened\\n',px(temp,j),'\\n',k) 如果条件概率为0会发生报错,特此修复\n",
" pass\n",
" #print(iris[D].value_counts().index[i])\n",
" #pd.Series(r)\n",
" #print(\"__________\\n\",r,\"\\np({}|{})\".format(j,iris[D].value_counts().index[i]))\n",
" #print(ent(temp,D))\n",
" #print(gain)\n",
" return frame\n",
"prob=pxy(iris,'status')\n",
"prob"
]
},
{
"cell_type": "markdown",
"metadata": {
"ExecuteTime": {
"end_time": "2019-04-29T13:16:53.809199Z",
"start_time": "2019-04-29T13:16:53.714Z"
}
},
"source": [
"## 开始计算"
]
},
{
"cell_type": "code",
"execution_count": 266,
"metadata": {
"ExecuteTime": {
"end_time": "2019-04-29T14:54:52.069428Z",
"start_time": "2019-04-29T14:54:52.066535Z"
}
},
"outputs": [],
"source": [
"#输入\n",
"x=pd.Series({'department':'Marketing','age':3135,'salary':'26K30K'})"
]
},
{
"attachments": {
"image.png": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAV0AAABbCAYAAADQplYeAAAVZklEQVR4Ae2dTcxfwxfH7/PPf0ks2oWXjbbUiqBbG1oJoYKgItHaCRbES9SKnUgrsamEVUvCgoUoIYpY2PYhrLyvWhIstNaefz7X//xynnlm5s7c95czyfPc+5s7c86Z77n33JlzZ+asbWxsbBSWDAFDwBAwBDpHYG1tbe0/nXMxBoaAIWAIGAIrBMzorqCwE0PAEDAEukfAjG73GBsHQ8AQMARWCJjRXUFhJ4aAIWAIdI+AGd3uMTYOhoAhYAisEDCju4LCTuaMwFdffVVcddVVBUdLhsCQCJjRHRL9ifB++eWXi7W1taQ/yqYmykr5Dz74IIn+vn37ir///rv49ddfi127dgXrQK/tFOMp/JDtwIEDQeOu21xHPuqn6EJwTeWRIxc4XHHFFV45Tp48WbIEh/vvvz+IA4VyeKa2YxLlmKdryRBIQeDgwYPM6S7/ONfp/PnzG3v37i2v7dy5c+Ps2bP68qbz9fX1jW3btm3EaAgfOUIbHm4SWlLu5MmTbpHyN+V27969wTE3HT16dNVuzt0ET+HPMSQr9QSnKoxcHu7vkC6EPnKk8ACP7du3b9GFy4/fKTisra2tsACHc+fO+UiVeci6b9++Us5goZldKF8KM2uTNadDBLRx8RkfDC0PuhgenyhCI2QcNY0UQyZGBiMeM6h1jC51oIsc7gvCbZuWu6osdSlTJbPLQ/8WHJHN1YWWJfYCgAZG8v3339ekvedgkYrDrl27VmX/+ecfLz2deejQoY3Tp0/rrNmem9GdrWq7aZj0dGLGQnpglHGTGDHXSITKaaMbeuAxHDF5hHau0ZW2+oya0HSPUif0QnHLNzG8wivUdq0H2u4m8ujhVumCesIrpawun2LMRS7asQTDa0ZXNG7HSgR074kHOpTkYcdY6ST5qQZJerCu4ZUhs1yPyaL55xhdDAB8Y71ETVvO4XHfffd53SBSxj2KQUttB/VTdCF40w4Xc7lWZRTFMAsWruyh34JDzLXg1gUHet3IltI7dutP5bcZ3aloagRy8uCKAXQfYhFPGwOMoyQeQgyZGEzJrzpKPeErRx5M/nKMIrRSfLoYc/iEepAxmWn/Cy+8ECuy5ZpglsOvShdCk3a4mAum5J85c2aLPJIBDvhbBYucXmhdHHBLgEMOL5F3KkczulPR1AjklB6Z+xBr0aQMDyrnkqRnxTE3aZrQFSMQk8PHI9XoCr86svr4VuVJj512pfIUGUMYyHVXD8iSqgtNI1WuqrbGrmsjD7+59nYxujZlrHz12L8YAkz/+eijj8oiN9xwQ3HJJZdsKc781xdffLHM37t3b/Hwww+X5+R/+OGH5fk111yzpV5VBnSgp9Off/5ZvPvuu145dLncc93Oe++9N7d6rfIXXHBBceutt5Z1v/zyy3IqXIyQltGni5AeoJmqC3h8/PHHKzHuueee1XlXJ+Bwyy23lNPQUnDoSo4+6JrR7QPlifP44Ycfiq+//rpshc9wHjp0qLj++usLjOHBgweLTz/9tOAhIp05c6bM55zFCbkJOkeOHCm2bdu2qeorr7yy6XcbP7744ovis88+K3bu3Fns2bOnDZJJNASXn3/+uTh9+nS0TpUuQnqAaKouBAfq9IlFDg5RkEZ+0YzuyBU0BvH0w/r0009vmRT/xhtvlMaWrZlPnDixSeR33nmn/I3RvOyyyzZdS/1x3XXXFc8999ym4vCUBQmbLjT48d1335W1fT3IBmQrq4KLvFQEr1ClKl3w0vPpAXpCu0oX4CDbbIPFpZdeGhKn1XyNAyMZkaFVJiMgZkZ3BEoYuwjysDLMP3/+fPkw8EDoP9fY0iaGqRiJNpLPzfD4449XDsdzeH/zzTdlcV9vPodOl2WrdOHTA/Lk6EJwoN6YsegS5y5pm9HtEt0Z0GbJJz42Ej0RcRvkNu2iiy4qLr744txqq/KvvfZaOfRfZRRFwXD8rbfe0lm1z7VRkmFubWINKvKSQhZf6kMX8D579uyK/e7du1fnfZ7EcOhTji54mdHtAtUZ0fztt9+Kv/76q2xRXx+XXPhwI/CRbn19vTh69Oimy+TzgWjKiZcRL6WqNAZdVMnY5HoqDk14jKGuGd0xaGHEMnz++eflh7AqP2BXTaB3hxvh+PHjBb5d183Ax7tnnnkm2DtMlYsevPicxbebWrevcn3oAhy0D/f777/vq3mL4WNGdzGqzm8oQ02ZKnbttdcWV155ZT6R/9egt0xPLSfB/8EHHyweffTR4vbbby+rymwGTYcZB7gfmibxX2qfZlOaKfV1DzZUvk9dCA7I0icWKTiE8JlSvhndKWmrZ1n19KQ6/lzde6wj+mOPPVb2Pp966qlN1enxuqkNN8MDDzxQTpHKnSeKQYxt5+jKGvsdwrlPXYAD22aSwEL7eGOyg0PVdo6x+vpaCAddZqrnZnSnqrke5JbhLKzq+nOlHm6AnJkM7LVK+WPHjnlb6lsw0dTNwKIPetW5H+ief/754vDhw6X7wytsRSbtBB+S4OVW6VMX4PDII4+UIoDF22+/7Yrj/Q0Ozz77bCs4sCCDfYNnmWJL8+zachHQy1NDy01T0NH7ALC0NCXJEtTQHg/QkD0EZGmwHEM8UpcB026hFeOPDIJRrJxuP3tF+JK0N4Sz8EGuUBkfXTdPyxLCSerAU/ZegG9scxwpW4WDbPkIDr7NcJCJTW9oY2xfCJFxikdeItbTneWrtHmjfFO06lCl18QEexL+YYagocQ1IkOwAIPEqrNYeR8d6uZGTdB0cImwwIC0f//+cpjNxzw3wePyyy8vV8uJv9ktw+8LL7yw2LFjR3lJPtS55cRvGlqU0ZUumHMdSuBw6tSpFRZ33HGH180gOLz00ksrv7uPposD9N307bfflnO/Qzi45Sf7e4pvC5O5GwR0j0p6e/qYsxOWllB6pbFemmzEovnJuVsPOeVa6OjWSe3parljMlX1FDWd2Ln0Pl1s+9BFTm8yhEWbONguY7E7xa4ZApkIyEPb1kOayb50R6Rs7ZhLt2l5huS8OMCnrzS0LnztBAfbT9eHzELy6vSKFgJN7WZKj87tgdYmmFlxjDqV3mzfmGhd5PR2MyHPKi6x0sYiT5bwGYVxidT26eLLSYlKSpkcHxtlc8rjbwtFhZUNUfALtrlqCbpVbZeotYAckxE60Mtt99T8Wfh22cSE+bp86V9CEh81OmYnNjfhq2X3ti62qXR56d+ii3Pnzm3ZSEiXa+scHG6++ebymQEH30Y2PJ/sK6EXZrTFf3R0Moy0t6gMVXxDJHmTcy3lbY5fK3WYxTBV/Hm+IasM26QMsqSmlF6Rbpvw4BiLZgBd2ijl9ddeoZeCU2o7xlhOMEjVc1ttSNFpW7z0venTNXykjL4H2uKfSgdMUiMBp9LU5aSNggFH3ywIyvnyNa25nJcvgKaN0cYN8NwkQxkAjxkk6KTcgPLQQq/qwdW8Xbliv1MfUE1fbqxYG8Wwuh9NtCy0KXZdl53queAWw6rttqXqtE2+cq+6L1J07Oa1yTeHFrrgAxa68E3jyqEVKgsOGHfa7LoPiARM/lISRre2e0G67LJOnbX5N910k2SvjnrKEEMpVta4iaHFQw89FJ1yQh2G4LJJMxufhLaxE/rwZrJ7V0mGaZo+S1JZSeVLbA4NBkzF8a2qog5tuu2228rhWJsuEZ88Q+WB208//VRGTGhj+e5Q7ajiKwsa9BQo7mGW2dJ+cBg6IcOPP/5Y6uL111/vRBxw+OOPP8qpg7rNYHH11VeXWHTCeKxEm75heGtX9WL1MMPtzepeT0wWTcPXow7VlR5O6LovX+pwTEnSy9XHUDvdfB99wWRJPQAfDlPOk1EN90SKzqfc1pjs4CCLLJbiQojhUb4HYgWqrnEziaEJ3VhiQCjnDqkwagyl3XzNV9+8dYbd8M+N0JprdKUdgoUcxf3BMXcoLbilGn6NmZ0Pj4B0EuQeGF6iYSTQOMw12GQOshjdRu4FcS3E4iixyTTrt0kM9fXwghVHrDnXw6+yoPqnV+Mw7A4Ny1WVTafwY014lwmZ3HAy8CMgIxuAsGnIm2++mbUBuKzg6SIWWJdYGO1/ozTI7myhvRSWgBOzFiTA5az3UshUZm2jC6ByY4WMJj5JX4RYZORaVZRYzYM6Y76B3X1ekZcXCkEa60wJYpkkUWJzd7zK1L8V7wABltf+8ssvZdyz0NLfDtiOjqTh4FdJbaOrt5rT+28Km1iEWMronZVC4VGGikoqbcg5YiR9UWsxvHV7q+DCKKEqQmyOnFa2ewTQF3prugdx95J2y8Fw8ONb2+hqo5kbIRZRJMBeLCKBuC8oH+pN+5s1TG7IzVA3cq30kgQrt1UpizRCizi2b9/e6oIRV7Yl/xZ9MVLxbeyyFGwkoi844C6z9C8CtY2u3Fi5EWJhi9sAo12VZPclyvl601X1h7juczMgR9uRa6HJ7lY6Im/OOVN4cv3jQ+A5NZ6sPsQlpKdQMupjetSSksbhxhtvLJvOtNCl4eDTeS2jK4BCsOkO76Eosa5hDrkgfI0aMk9/+NNyMNysG7mWFxR4WBo/AjrkDDpjOTjfItzoF+NvSTMJfTjwMW1pOPhQ/K8vsypPAzrmj1tV7Wj7OsN9iVrLhHDZF1b4cI0FJKk9zCYhy4WnHYdBAF/+XXfdFV0IM4xk/XIFh7vvvrv45JNPyoVN/XIfJ7daPV1ZaRPzxzZtLr4w8WlCS/t3m9Luoj69/1jUWnhyAzYNKdOF7EazPQR4oeK6wdWzZBcOOPz+++8rHFhJaulfBLKNLkMmmSrWxtfZWJRY7cfV/t2xKQ9MQlFreTHplBO5lhGFJUPAEJgXAtlGV08Vq+vPdXuxIUglOivXc+erYgjbitAakk/yY1FrfYsmciPXhnC22QuiATsaAhNCIGcJG2VlWR9LXUNLf1Nopiwhdvnl7Lnw5JNPltECUmRxy+QuA44t8dXLmGV5MMdYHZFHMGqCs9BKOWr57LxYLXE3LIbFYk7Lh3k1ZPd0xbUQW/qb8s7Zs2dPAQ1SzF+rp2DxYYreXSzJF2Omqfg+WOF7lU3P9UbjMZqxa0yBeeKJJ4LzMUOLJlLcDODSFOeY7O61nClnVnaj9nQ9wy4PO+aazyllG12MRRuJPRFY8EDCkGMsfQmjxVLatiK0yp4G8AoN231yuHli3HkR1IlaCz3qxuYt4seewqIQFxv7bQgYAhEEUoadoSEyw646O38JT9mdK7bLmJSVo2wl6Rvy5bgfhJ7vWOVeCMngtiOGm5bfrYdMtsuYTzPt5GkXmdaDe557P1Fe6ohryKWpf2sXk+hbX9fn0NP020HCqPSNQGmK+2bq8hMDJjere32I31VGtw+Z5KHtg9dSeci9h3HjXJJ+WfpeiFJOjtJ50DS4puloA6qNrdCQo9CS8tqfL/RSZBJ6dhwXAqMwuvKG50YaSxra6OqHayyYzFEOebFh4NyXvtyXXIsZSaGhjaPGStMRQxqjJ7qPjSAx7rHrmr+djwsBjG62Tzfiqah1Cd+uRImtRSCjkvhhQ9FZM0h1WlRHie2U0cKJywdcvU+CQKK/OVSFmSJ0FPtg+JLc33q+toV08iG1oLyxvAfoXbrDszZlk16GHEM9E3gO2dOlx4WMMfnaxGXJtLjfwDrU8xRd+PQhPdhQXRdXTSt0DwrNFN1LWXM1uEiP+zevlsF7uvJ+Y3oXCyDamMYlNPWRaTrr6+vl7k99TsPSMlSdsxvVq6++Wpw9ezbYc6qiYdfTEEjZtCm2CvLw4cPlnrmx6YJaEj31UfL1znOMwljVyGyVUK9Z6nGkB00kFjZSQhZL00FgNEYXyLqOECt7RoxxGhZTx8YUJXY6t3A9SWWDbWr7Nm3SRtl9SUvUEzc/JonM19ZltMGUCNLHjh3TRaLnsmKTCCxzjRwdBWCqF8fdGW9POvlA4RsqtsfFKE0FARnuh4bncp37hXOdxC1Rxx0mrgU58kHswIED0eCsmrc+1/d0HVk0LTvvB4HyPdEPq+G5yENkN+fwuhhaAj0tyzWoyCYG0Xev4G+V6ym+V7etmrfQ4ejj5db1/W4qj4+m5XWHAEZ3VO6FLkcLsnzZN5Tskq/RHh8CVaGmWP2I7T1x4sQW4SViSt1tTfl24dsEqUlIJ5kZIbJtEdoyRoXAYoyuRWcd1X03qDBinEKhpnzGFoH52JUSZqqqcb6PatTRH9aqaNj16SKwGKNr0Vmne5O2Kbn+QNZk741QmKkUWbsI6QTfNl4IKfJbmWYILMboAtPSo7M2u1XmUXvoUFM6pBOLKtyUu9cyIZ14AViaDgKLMLr0bvSqoyVGZ53OLdmtpDJtsK5Ptql0FtKpKYLTr78Ioythb2QZ8BKjs07/Vm3eAvQvH1SbhpqKhZkKSQp/FjTI4geZuysfwqReyl7LUlb33CXPjuNGYBFGFxVIdNYjR46sbvpxq8akaxuBpqGmMJL4gesmFkC4IchDsxly3QzI1ES2um2yevkILMLocmMzBWjJ0Vnzb4351RDXAi2rO3VQ6vESz/lwxYrDUHnfbIbUyNHQpGyTNs1P0+Nu0SKM7rhVYNL1gYB2LeQs33VlSw0zpethcIkSwj4NvtTEzSA7pTVpk08my+sOATO63WFrlEeEQGiaVq6IesvHWJgp6Mo3BAwuibBOuSklpBM0x7ifSG5bF1O+uwVvRtkQGBYBvTeBXnIr53U3ApelvKF9G2g1y3qFjz66dapklLpuPXjI9o512zGsdpbJnRfLGk1fzBvGGmoItIQA0w5ZustcW/fjWEssKskw53f//v1l0NbQKrpKIlagVwTWSGZ0e8XcmM0EAeZ+M6QnsQ80boc+E66LO++8s2B5+xD8+2zrnHhhc82nOyeNWlt6Q0DC8DBfd4hNxHVIp74Nfm8gz5SRGd2ZKtaa1T0CTEU8depUwSbiuBv6SjIb4vjx4wUyWJoWAuZemJa+TNoRIiCuhh07dhTvvfdewRSwrhLGHXeCuRS6QrhbuuZe6BZfo74QBBjedx1qCigtpNM8bijr6c5Dj9YKQ8AQmAAC1tOdgJJMREPAEJgXAvYhbV76tNYYAobAyBEwoztyBZl4hoAhMC8EzOjOS5/WGkPAEBg5AmZ0R64gE88QMATmhYAZ3Xnp01pjCBgCI0fAjO7IFWTiGQKGwLwQMKM7L31aawwBQ2DkCJjRHbmCTDxDwBCYFwJmdOelT2uNIWAIjBwBM7ojV5CJZwgYAvNCwIzuvPRprTEEDIGRI2BGd+QKMvEMAUNgXgiY0Z2XPq01hoAhMHIEzOiOXEEmniFgCBgChoAhYAgYAoZATQT+B8zqtWWLMhD1AAAAAElFTkSuQmCC"
}
},
"cell_type": "markdown",
"metadata": {},
"source": [
"## "
]
},
{
"cell_type": "code",
"execution_count": 249,
"metadata": {
"ExecuteTime": {
"end_time": "2019-04-29T14:50:16.089687Z",
"start_time": "2019-04-29T14:50:16.086748Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"'age'"
]
},
"execution_count": 249,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x.index[0]"
]
},
{
"cell_type": "code",
"execution_count": 276,
"metadata": {
"ExecuteTime": {
"end_time": "2019-04-29T14:56:40.796120Z",
"start_time": "2019-04-29T14:56:40.791778Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"0.36363636363636365"
]
},
"execution_count": 276,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"px(iris,x.index[0])[3135]"
]
},
{
"cell_type": "markdown",
"metadata": {
"ExecuteTime": {
"end_time": "2019-04-29T14:29:31.033580Z",
"start_time": "2019-04-29T14:29:31.029264Z"
}
},
"source": [
"###### P\\left(C_{i} | \\mathbf{X}\\right)=\\frac{P\\left(\\mathbf{X} | C_{i}\\right) P\\left(C_{i}\\right)}{P(\\mathbf{X})}"
]
},
{
"cell_type": "code",
"execution_count": 286,
"metadata": {
"ExecuteTime": {
"end_time": "2019-04-29T15:18:27.207818Z",
"start_time": "2019-04-29T15:18:27.196240Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.8402777777777776\n",
"0.0\n"
]
}
],
"source": [
"\n",
"for j in iris['status'].value_counts().index:\n",
" p=px(iris,'status')[j]\n",
" for i in range(len(x)):\n",
" p*=prob.loc[x[i],j]/px(iris,x.index[i])[x[i]]\n",
" print(p)\n",
" #可见接下来需要解决的是laplace平滑"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {
"height": "calc(100% - 180px)",
"left": "10px",
"top": "150px",
"width": "255px"
},
"toc_section_display": true,
"toc_window_display": true
},
"varInspector": {
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": 40
},
"kernels_config": {
"python": {
"delete_cmd_postfix": "",
"delete_cmd_prefix": "del ",
"library": "var_list.py",
"varRefreshCmd": "print(var_dic_list())"
},
"r": {
"delete_cmd_postfix": ") ",
"delete_cmd_prefix": "rm(",
"library": "var_list.r",
"varRefreshCmd": "cat(var_dic_list()) "
}
},
"position": {
"height": "319px",
"left": "742px",
"right": "20px",
"top": "15px",
"width": "350px"
},
"types_to_exclude": [
"module",
"function",
"builtin_function_or_method",
"instance",
"_Feature"
],
"window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}