{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2019-12-24T17:41:41.902299Z", "start_time": "2019-12-24T17:41:40.774410Z" } }, "outputs": [], "source": [ "import pandas as pd\n", "pd.options.display.max_colwidth = 500" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2019-12-24T17:41:41.913664Z", "start_time": "2019-12-24T17:41:41.904256Z" } }, "outputs": [], "source": [ "df = pd.read_csv(r'examples/test.csv', sep='\\n', skiprows=[0], names=['Description'])\n", "\n", "df.index = list(range(1, 22))" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2019-12-24T17:41:42.060477Z", "start_time": "2019-12-24T17:41:41.915659Z" } }, "outputs": [], "source": [ "def f1(x):\n", " x = x.split(' ')\n", " return x.pop(0)\n", "\n", "\n", "df['Argument'] = df['Description'].map(f1) # get the first word" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2019-12-24T17:41:42.218867Z", "start_time": "2019-12-24T17:41:42.066314Z" } }, "outputs": [], "source": [ "def f2(x):\n", " x = x.split(' ')\n", " return \" \".join(x[1:])\n", "\n", "df['Description'] = df['Description'].map(f2) # remove the first word" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "ExecuteTime": { "end_time": "2019-12-24T17:41:42.377774Z", "start_time": "2019-12-24T17:41:42.225841Z" } }, "outputs": [], "source": [ "df = df.reindex(columns=['Argument', 'Description'])" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "ExecuteTime": { "end_time": "2019-12-24T17:41:42.589564Z", "start_time": "2019-12-24T17:41:42.381000Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ArgumentDescription
1pathString indicating filesystem location, URL, or file-like object
2sepCharacter sequence or regular expression to use to split fields in each row
3headerRow number to use as column names; defaults to 0 (first row), but should be None if there is no header row
4index_colColumn numbers or names to use as the row index in the result; can be a single name/number or a list of them for a hierarchical index
5namesList of column names for result, combine with header=None
6skiprowsNumber of rows at beginning of file to ignore or list of row numbers (starting from 0) to skip.
7na_valuesSequence of values to replace with NA.
8commentCharacter(s) to split comments off the end of lines.
9parse_datesAttempt to parse data to datetime; False by default. If True, will attempt to parse all columns. Otherwise can specify a list of column numbers or name to parse. If element of list is tuple or list, will combine multiple columns together and parse to date (e.g., if date/time split across two columns).
10keep_date_colIf joining columns to parse date, keep the joined columns; False by default.
11convertersDict containing column number of name mapping to functions (e.g., {'foo': f} would apply the function f to all values in the 'foo' column).
12dayfirstWhen parsing potentially ambiguous dates, treat as international format (e.g., 7/6/2012 -> June 7, 2012); False by default.
13date_parserFunction to use to parse dates.
14nrowsNumber of rows to read from beginning of file.
15iteratorReturn a TextParser object for reading file piecemeal.
16chunksizeFor iteration, size of file chunks.
17skip_footerNumber of lines to ignore at end of file.
18verbosePrint various parser output information, like the number of missing values placed in non-numeric columns.
19encodingText encoding for Unicode (e.g., 'utf-8' for UTF-8 encoded text).
20squeezeIf the parsed data only contains one column, return a Series.
21thousandsSeparator for thousands (e.g., ',' or '.').
\n", "
" ], "text/plain": [ " Argument \\\n", "1 path \n", "2 sep \n", "3 header \n", "4 index_col \n", "5 names \n", "6 skiprows \n", "7 na_values \n", "8 comment \n", "9 parse_dates \n", "10 keep_date_col \n", "11 converters \n", "12 dayfirst \n", "13 date_parser \n", "14 nrows \n", "15 iterator \n", "16 chunksize \n", "17 skip_footer \n", "18 verbose \n", "19 encoding \n", "20 squeeze \n", "21 thousands \n", "\n", " Description \n", "1 String indicating filesystem location, URL, or file-like object \n", "2 Character sequence or regular expression to use to split fields in each row \n", "3 Row number to use as column names; defaults to 0 (first row), but should be None if there is no header row \n", "4 Column numbers or names to use as the row index in the result; can be a single name/number or a list of them for a hierarchical index \n", "5 List of column names for result, combine with header=None \n", "6 Number of rows at beginning of file to ignore or list of row numbers (starting from 0) to skip. \n", "7 Sequence of values to replace with NA. \n", "8 Character(s) to split comments off the end of lines. \n", "9 Attempt to parse data to datetime; False by default. If True, will attempt to parse all columns. Otherwise can specify a list of column numbers or name to parse. If element of list is tuple or list, will combine multiple columns together and parse to date (e.g., if date/time split across two columns). \n", "10 If joining columns to parse date, keep the joined columns; False by default. \n", "11 Dict containing column number of name mapping to functions (e.g., {'foo': f} would apply the function f to all values in the 'foo' column). \n", "12 When parsing potentially ambiguous dates, treat as international format (e.g., 7/6/2012 -> June 7, 2012); False by default. \n", "13 Function to use to parse dates. \n", "14 Number of rows to read from beginning of file. \n", "15 Return a TextParser object for reading file piecemeal. \n", "16 For iteration, size of file chunks. \n", "17 Number of lines to ignore at end of file. \n", "18 Print various parser output information, like the number of missing values placed in non-numeric columns. \n", "19 Text encoding for Unicode (e.g., 'utf-8' for UTF-8 encoded text). \n", "20 If the parsed data only contains one column, return a Series. \n", "21 Separator for thousands (e.g., ',' or '.'). " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.4" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "window_display": false } }, "nbformat": 4, "nbformat_minor": 2 }