{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Given two dataframes df1 and df2, each with two columns a and b, the idea is to create a new dataframe with values in\n", "* df1 if an entry exists only in df1\n", "* df2 if an entry exists in both df1 and df2\n", "* df2 if an entry exists only in df2" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "df1\n", " a b\n", "0 1 18\n", "1 2 19\n", "2 3 20\n", "3 4 21\n", "4 5 22\n", "df2\n", " a b\n", "0 5 23\n", "1 4 24\n", "2 6 25\n" ] } ], "source": [ "import pandas as pd\n", "df1 = pd.DataFrame({'a': [1,2,3,4,5], 'b': [18, 19, 20, 21, 22]})\n", "print('df1')\n", "print(df1)\n", "df2 = pd.DataFrame({'a': [5,4,6], 'b': [23, 24, 25]})\n", "print('df2')\n", "print(df2)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | a | \n", "b_x | \n", "b_y | \n", "
---|---|---|---|
0 | \n", "1 | \n", "18.0 | \n", "NaN | \n", "
1 | \n", "2 | \n", "19.0 | \n", "NaN | \n", "
2 | \n", "3 | \n", "20.0 | \n", "NaN | \n", "
3 | \n", "4 | \n", "21.0 | \n", "24.0 | \n", "
4 | \n", "5 | \n", "22.0 | \n", "23.0 | \n", "
5 | \n", "6 | \n", "NaN | \n", "25.0 | \n", "