{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"execution": {
"iopub.execute_input": "2021-11-09T03:40:59.657176Z",
"iopub.status.busy": "2021-11-09T03:40:59.656913Z",
"iopub.status.idle": "2021-11-09T03:40:59.660859Z",
"shell.execute_reply": "2021-11-09T03:40:59.660154Z",
"shell.execute_reply.started": "2021-11-09T03:40:59.657147Z"
},
"tags": []
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"execution": {
"iopub.execute_input": "2021-11-09T03:41:00.023002Z",
"iopub.status.busy": "2021-11-09T03:41:00.022740Z",
"iopub.status.idle": "2021-11-09T03:41:00.033547Z",
"shell.execute_reply": "2021-11-09T03:41:00.032810Z",
"shell.execute_reply.started": "2021-11-09T03:41:00.022977Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"('1.3.2', '1.21.2')"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.__version__, np.__version__"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"execution": {
"iopub.execute_input": "2021-11-09T03:41:00.537187Z",
"iopub.status.busy": "2021-11-09T03:41:00.535145Z",
"iopub.status.idle": "2021-11-09T03:41:00.580259Z",
"shell.execute_reply": "2021-11-09T03:41:00.579517Z",
"shell.execute_reply.started": "2021-11-09T03:41:00.537094Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" country | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" john | \n",
" USA | \n",
"
\n",
" \n",
" 1 | \n",
" david | \n",
" UK | \n",
"
\n",
" \n",
" 2 | \n",
" anna | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name country\n",
"0 john USA\n",
"1 david UK\n",
"2 anna NaN"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# needs two steps\n",
"# one to assign the dataframe to a variable\n",
"df = pd.DataFrame({\n",
" 'name':['john','david','anna'],\n",
" 'country':['USA','UK',np.nan]\n",
"})\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"execution": {
"iopub.execute_input": "2021-11-09T03:41:01.322087Z",
"iopub.status.busy": "2021-11-09T03:41:01.321370Z",
"iopub.status.idle": "2021-11-09T03:41:01.347716Z",
"shell.execute_reply": "2021-11-09T03:41:01.346825Z",
"shell.execute_reply.started": "2021-11-09T03:41:01.322012Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" country | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" john | \n",
" USA | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name country\n",
"0 john USA"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# another one to perform the filter\n",
"df[df['country']=='USA']"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" country | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" john | \n",
" USA | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name country\n",
"0 john USA"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.DataFrame({\n",
" 'name':['john','david','anna'],\n",
" 'country':['USA','UK',np.nan]\n",
"}).query(\"country == 'USA'\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## query where is null"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" country | \n",
"
\n",
" \n",
" \n",
" \n",
" 2 | \n",
" anna | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name country\n",
"2 anna NaN"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame({\n",
" 'name':['john','david','anna'],\n",
" 'country':['USA','UK',np.nan]\n",
"})\n",
"\n",
"df.query('country.isnull()')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## query with python variable"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" country | \n",
" age | \n",
"
\n",
" \n",
" \n",
" \n",
" 1 | \n",
" david | \n",
" UK | \n",
" 45 | \n",
"
\n",
" \n",
" 2 | \n",
" anna | \n",
" NaN | \n",
" 45 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name country age\n",
"1 david UK 45\n",
"2 anna NaN 45"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"df = pd.DataFrame({\n",
" 'name':['john','david','anna'],\n",
" 'country':['USA','UK',np.nan],\n",
" 'age':[23,45,45]\n",
"})\n",
"\n",
"target_age = 45\n",
"\n",
"df.query('age == @target_age')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## OR operator"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" country | \n",
" age | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" john | \n",
" USA | \n",
" 23 | \n",
"
\n",
" \n",
" 1 | \n",
" david | \n",
" UK | \n",
" 45 | \n",
"
\n",
" \n",
" 2 | \n",
" anna | \n",
" USA | \n",
" 45 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name country age\n",
"0 john USA 23\n",
"1 david UK 45\n",
"2 anna USA 45"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"\n",
"df = pd.DataFrame({\n",
" 'name':['john','david','anna'],\n",
" 'country':['USA','UK', 'USA'],\n",
" 'age':[23,45,45]\n",
"})\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" country | \n",
" age | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" john | \n",
" USA | \n",
" 23 | \n",
"
\n",
" \n",
" 1 | \n",
" david | \n",
" UK | \n",
" 45 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name country age\n",
"0 john USA 23\n",
"1 david UK 45"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.query(\"(name=='john') or (country=='UK')\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## AND operator"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" country | \n",
" age | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" john | \n",
" USA | \n",
" 23 | \n",
"
\n",
" \n",
" 1 | \n",
" david | \n",
" UK | \n",
" 45 | \n",
"
\n",
" \n",
" 2 | \n",
" anna | \n",
" USA | \n",
" 45 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name country age\n",
"0 john USA 23\n",
"1 david UK 45\n",
"2 anna USA 45"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"\n",
"df = pd.DataFrame({\n",
" 'name':['john','david','anna'],\n",
" 'country':['USA','UK', 'USA'],\n",
" 'age':[23,45,45]\n",
"})\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" country | \n",
" age | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" john | \n",
" USA | \n",
" 23 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name country age\n",
"0 john USA 23"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.query(\"(country=='USA') and (age==23)\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Column is in array"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" country | \n",
" age | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" john | \n",
" USA | \n",
" 23 | \n",
"
\n",
" \n",
" 1 | \n",
" david | \n",
" UK | \n",
" 45 | \n",
"
\n",
" \n",
" 2 | \n",
" anna | \n",
" USA | \n",
" 45 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name country age\n",
"0 john USA 23\n",
"1 david UK 45\n",
"2 anna USA 45"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"\n",
"df = pd.DataFrame({\n",
" 'name':['john','david','anna'],\n",
" 'country':['USA','UK', 'USA'],\n",
" 'age':[23,45,45]\n",
"})\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" country | \n",
" age | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" john | \n",
" USA | \n",
" 23 | \n",
"
\n",
" \n",
" 2 | \n",
" anna | \n",
" USA | \n",
" 45 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name country age\n",
"0 john USA 23\n",
"2 anna USA 45"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"names_array = ['john','anna']\n",
"\n",
"df.query('name in @names_array')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Column is not in array"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" country | \n",
" age | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" john | \n",
" USA | \n",
" 23 | \n",
"
\n",
" \n",
" 1 | \n",
" david | \n",
" UK | \n",
" 45 | \n",
"
\n",
" \n",
" 2 | \n",
" anna | \n",
" USA | \n",
" 45 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name country age\n",
"0 john USA 23\n",
"1 david UK 45\n",
"2 anna USA 45"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"\n",
"df = pd.DataFrame({\n",
" 'name':['john','david','anna'],\n",
" 'country':['USA','UK', 'USA'],\n",
" 'age':[23,45,45]\n",
"})\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" country | \n",
" age | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" john | \n",
" USA | \n",
" 23 | \n",
"
\n",
" \n",
" 1 | \n",
" david | \n",
" UK | \n",
" 45 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name country age\n",
"0 john USA 23\n",
"1 david UK 45"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"invalid_array = ['anna']\n",
"\n",
"df.query('name not in @invalid_array')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Column name with spaces"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" country of birth | \n",
" age | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" john | \n",
" USA | \n",
" 23 | \n",
"
\n",
" \n",
" 1 | \n",
" david | \n",
" UK | \n",
" 45 | \n",
"
\n",
" \n",
" 2 | \n",
" anna | \n",
" USA | \n",
" 45 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name country of birth age\n",
"0 john USA 23\n",
"1 david UK 45\n",
"2 anna USA 45"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"\n",
"df = pd.DataFrame({\n",
" 'name':['john','david','anna'],\n",
" 'country of birth':['USA','UK', 'USA'],\n",
" 'age':[23,45,45]\n",
"})\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" country of birth | \n",
" age | \n",
"
\n",
" \n",
" \n",
" \n",
" 1 | \n",
" david | \n",
" UK | \n",
" 45 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name country of birth age\n",
"1 david UK 45"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.query('`country of birth` == \"UK\"')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Where column is null"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" country | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" john | \n",
" USA | \n",
"
\n",
" \n",
" 1 | \n",
" david | \n",
" UK | \n",
"
\n",
" \n",
" 2 | \n",
" anna | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name country\n",
"0 john USA\n",
"1 david UK\n",
"2 anna NaN"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"df = pd.DataFrame({\n",
" 'name':['john','david','anna'],\n",
" 'country':['USA','UK',np.nan]\n",
"})\n",
"\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" country | \n",
"
\n",
" \n",
" \n",
" \n",
" 2 | \n",
" anna | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name country\n",
"2 anna NaN"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.query('country.isnull()')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Where column is not null"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" country | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" john | \n",
" USA | \n",
"
\n",
" \n",
" 1 | \n",
" david | \n",
" UK | \n",
"
\n",
" \n",
" 2 | \n",
" anna | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name country\n",
"0 john USA\n",
"1 david UK\n",
"2 anna NaN"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"df = pd.DataFrame({\n",
" 'name':['john','david','anna'],\n",
" 'country':['USA','UK',np.nan]\n",
"})\n",
"\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" country | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" john | \n",
" USA | \n",
"
\n",
" \n",
" 1 | \n",
" david | \n",
" UK | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name country\n",
"0 john USA\n",
"1 david UK"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.query('country.notnull()')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## like"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" col1 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" foo | \n",
"
\n",
" \n",
" 1 | \n",
" bar | \n",
"
\n",
" \n",
" 2 | \n",
" baz | \n",
"
\n",
" \n",
" 3 | \n",
" quux | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" col1\n",
"0 foo\n",
"1 bar\n",
"2 baz\n",
"3 quux"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"\n",
"df = pd.DataFrame({\n",
" 'col1':['foo','bar','baz','quux']\n",
"})\n",
"\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" col1 | \n",
"
\n",
" \n",
" \n",
" \n",
" 1 | \n",
" bar | \n",
"
\n",
" \n",
" 2 | \n",
" baz | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" col1\n",
"1 bar\n",
"2 baz"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.query('col1.str.contains(\"ba\")')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
},
"toc-autonumbering": false,
"toc-showcode": true
},
"nbformat": 4,
"nbformat_minor": 4
}