{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2019-12-24T17:41:41.902299Z", "start_time": "2019-12-24T17:41:40.774410Z" } }, "outputs": [], "source": [ "import pandas as pd\n", "pd.options.display.max_colwidth = 500" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2019-12-24T17:41:41.913664Z", "start_time": "2019-12-24T17:41:41.904256Z" } }, "outputs": [], "source": [ "df = pd.read_csv(r'examples/test.csv', sep='\\n', skiprows=[0], names=['Description'])\n", "\n", "df.index = list(range(1, 22))" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2019-12-24T17:41:42.060477Z", "start_time": "2019-12-24T17:41:41.915659Z" } }, "outputs": [], "source": [ "def f1(x):\n", " x = x.split(' ')\n", " return x.pop(0)\n", "\n", "\n", "df['Argument'] = df['Description'].map(f1) # get the first word" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2019-12-24T17:41:42.218867Z", "start_time": "2019-12-24T17:41:42.066314Z" } }, "outputs": [], "source": [ "def f2(x):\n", " x = x.split(' ')\n", " return \" \".join(x[1:])\n", "\n", "df['Description'] = df['Description'].map(f2) # remove the first word" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "ExecuteTime": { "end_time": "2019-12-24T17:41:42.377774Z", "start_time": "2019-12-24T17:41:42.225841Z" } }, "outputs": [], "source": [ "df = df.reindex(columns=['Argument', 'Description'])" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "ExecuteTime": { "end_time": "2019-12-24T17:41:42.589564Z", "start_time": "2019-12-24T17:41:42.381000Z" } }, "outputs": [ { "data": { "text/html": [ "
| \n", " | Argument | \n", "Description | \n", "
|---|---|---|
| 1 | \n", "path | \n", "String indicating filesystem location, URL, or file-like object | \n", "
| 2 | \n", "sep | \n", "Character sequence or regular expression to use to split fields in each row | \n", "
| 3 | \n", "header | \n", "Row number to use as column names; defaults to 0 (first row), but should be None if there is no header row | \n", "
| 4 | \n", "index_col | \n", "Column numbers or names to use as the row index in the result; can be a single name/number or a list of them for a hierarchical index | \n", "
| 5 | \n", "names | \n", "List of column names for result, combine with header=None | \n", "
| 6 | \n", "skiprows | \n", "Number of rows at beginning of file to ignore or list of row numbers (starting from 0) to skip. | \n", "
| 7 | \n", "na_values | \n", "Sequence of values to replace with NA. | \n", "
| 8 | \n", "comment | \n", "Character(s) to split comments off the end of lines. | \n", "
| 9 | \n", "parse_dates | \n", "Attempt to parse data to datetime; False by default. If True, will attempt to parse all columns. Otherwise can specify a list of column numbers or name to parse. If element of list is tuple or list, will combine multiple columns together and parse to date (e.g., if date/time split across two columns). | \n", "
| 10 | \n", "keep_date_col | \n", "If joining columns to parse date, keep the joined columns; False by default. | \n", "
| 11 | \n", "converters | \n", "Dict containing column number of name mapping to functions (e.g., {'foo': f} would apply the function f to all values in the 'foo' column). | \n", "
| 12 | \n", "dayfirst | \n", "When parsing potentially ambiguous dates, treat as international format (e.g., 7/6/2012 -> June 7, 2012); False by default. | \n", "
| 13 | \n", "date_parser | \n", "Function to use to parse dates. | \n", "
| 14 | \n", "nrows | \n", "Number of rows to read from beginning of file. | \n", "
| 15 | \n", "iterator | \n", "Return a TextParser object for reading file piecemeal. | \n", "
| 16 | \n", "chunksize | \n", "For iteration, size of file chunks. | \n", "
| 17 | \n", "skip_footer | \n", "Number of lines to ignore at end of file. | \n", "
| 18 | \n", "verbose | \n", "Print various parser output information, like the number of missing values placed in non-numeric columns. | \n", "
| 19 | \n", "encoding | \n", "Text encoding for Unicode (e.g., 'utf-8' for UTF-8 encoded text). | \n", "
| 20 | \n", "squeeze | \n", "If the parsed data only contains one column, return a Series. | \n", "
| 21 | \n", "thousands | \n", "Separator for thousands (e.g., ',' or '.'). | \n", "