{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "\n", "def get_df():\n", " df = pd.DataFrame({'col': ['00000 UNITED STATES', '01000 ALABAMA', \n", " '01001 Autauga County, AL', '01003 Baldwin County, AL', \n", " '01005 Barbour County, AL'],\n", " 'type': ['country', 'state', 'county', 'county', 'county']\n", " })\n", " return df" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " col type\n", "0 00000 UNITED STATES country\n", "1 01000 ALABAMA state\n", "2 01001 Autauga County, AL county\n", "3 01003 Baldwin County, AL county\n", "4 01005 Barbour County, AL county\n", " col type A B\n", "0 00000 UNITED STATES country 00000 UNITED STATES\n", "1 01000 ALABAMA state 01000 ALABAMA\n", "2 01001 Autauga County, AL county 01001 Autauga County, AL\n", "3 01003 Baldwin County, AL county 01003 Baldwin County, AL\n", "4 01005 Barbour County, AL county 01005 Barbour County, AL\n" ] } ], "source": [ "df = get_df()\n", "print(df)\n", "df[['A', 'B']] = df['col'].str.split(n=1, expand=True)\n", "print(df)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " col type\n", "0 00000 UNITED STATES country\n", "1 01000 ALABAMA state\n", "2 01001 Autauga County, AL county\n", "3 01003 Baldwin County, AL county\n", "4 01005 Barbour County, AL county\n", " type A B\n", "0 country 00000 UNITED STATES\n", "1 state 01000 ALABAMA\n", "2 county 01001 Autauga County, AL\n", "3 county 01003 Baldwin County, AL\n", "4 county 01005 Barbour County, AL\n" ] } ], "source": [ "# If the original columnis to be removed, there are two approaches\n", "\n", "# method 1: use pop()\n", "df = get_df()\n", "print(df)\n", "df[['A', 'B']] = df.pop('col').str.split(n=1, expand=True)\n", "print(df)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " col type\n", "0 00000 UNITED STATES country\n", "1 01000 ALABAMA state\n", "2 01001 Autauga County, AL county\n", "3 01003 Baldwin County, AL county\n", "4 01005 Barbour County, AL county\n", " col type A B\n", "0 00000 UNITED STATES country 00000 UNITED STATES\n", "1 01000 ALABAMA state 01000 ALABAMA\n", "2 01001 Autauga County, AL county 01001 Autauga County, AL\n", "3 01003 Baldwin County, AL county 01003 Baldwin County, AL\n", "4 01005 Barbour County, AL county 01005 Barbour County, AL\n" ] } ], "source": [ "# method 2: Include the new columns as such and drop the original next\n", "df = get_df()\n", "print(df)\n", "df[['A', 'B']] = df['col'].str.split(n=1, expand=True)\n", "df.drop('col', axis=1)\n", "print(df)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "See also:\n", "* https://stackoverflow.com/questions/14745022/how-to-split-a-column-into-two-columns -> https://stackoverflow.com/a/49955802/6305733" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.8" } }, "nbformat": 4, "nbformat_minor": 2 }