{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Given two dataframes df1 and df2, each with two columns a and b, the idea is to create a new dataframe with values in\n", "* df1 if an entry exists only in df1\n", "* df2 if an entry exists in both df1 and df2\n", "* df2 if an entry exists only in df2" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "df1\n", " a b\n", "0 1 18\n", "1 2 19\n", "2 3 20\n", "3 4 21\n", "4 5 22\n", "df2\n", " a b\n", "0 5 23\n", "1 4 24\n", "2 6 25\n" ] } ], "source": [ "import pandas as pd\n", "df1 = pd.DataFrame({'a': [1,2,3,4,5], 'b': [18, 19, 20, 21, 22]})\n", "print('df1')\n", "print(df1)\n", "df2 = pd.DataFrame({'a': [5,4,6], 'b': [23, 24, 25]})\n", "print('df2')\n", "print(df2)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ab_xb_y
0118.0NaN
1219.0NaN
2320.0NaN
3421.024.0
4522.023.0
56NaN25.0
\n", "
" ], "text/plain": [ " a b_x b_y\n", "0 1 18.0 NaN\n", "1 2 19.0 NaN\n", "2 3 20.0 NaN\n", "3 4 21.0 24.0\n", "4 5 22.0 23.0\n", "5 6 NaN 25.0" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df3 = pd.merge(df1, df2, how='outer', on='a')\n", "df3" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "df3\n", " a b\n", "0 1 18.0\n", "1 2 19.0\n", "2 3 20.0\n", "3 4 24.0\n", "4 5 23.0\n", "5 6 25.0\n" ] } ], "source": [ "df3.loc[df3['b_y'].isna(), 'b_y'] = df3['b_x']\n", "df3.drop(['b_x'], axis=1, inplace=True)\n", "df3.rename(columns={'b_y':'b'}, inplace=True)\n", "print('df3')\n", "print(df3)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.9" } }, "nbformat": 4, "nbformat_minor": 2 }