{ "metadata": { "name": "", "signature": "sha256:81aed14b5c7bdf9461390525cbd56f1b2754be3bd424493d7fcd7b81d0350bdf" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "https://groups.google.com/forum/#!topic/python-etl/XRIJovpb6Qc\n", "\n", "The conversion I'm trying to make is:\n", "- If nb_day is not integer:\n", " - if notes contains 'am':\n", " - split line:\n", " 1 with full days, modifying date_end = previous date_end - 1\n", " 1 with 0.5 day, modifying date_begin = date_end\n", " - if notes contacts 'pm':\n", " - split line:\n", " 1 with 0.5 day, modifying date_end = date_begin\n", " 1 with full days, modifying date_begin = previous date_begin + 1" ] }, { "cell_type": "code", "collapsed": false, "input": [ "import datetime\n", "import petl.interactive as etl\n", "print etl.__version__" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "0.26\n" ] } ], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "data = \"\"\"user_id,date_begin,date_end,nb_days,notes,projet_id\n", "user1,2014-07-31,2014-08-07,5.5,5 days + am,cp\n", "user2,2014-07-31,2014-08-07,5.5,5 days + pm,cp\n", "user3,2014-07-31,2014-08-06,5,5 days,cp\n", "\"\"\"" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "day = datetime.timedelta(days=1)\n", "\n", "\n", "def split_partial_days(row):\n", " if isinstance(row.nb_days, float):\n", " # split out partial days into separate row\n", " if 'am' in row.notes:\n", " # full days\n", " yield (row.user_id, \n", " row.date_begin, \n", " row.date_end - day,\n", " int(row.nb_days),\n", " row.notes.split('+')[0].strip(), \n", " row.projet_id)\n", " # partial days\n", " yield (row.user_id, \n", " row.date_end, \n", " row.date_end, \n", " row.nb_days - int(row.nb_days),\n", " row.notes.split('+')[1].strip(), \n", " row.projet_id)\n", " if 'pm' in row.notes:\n", " # partial days\n", " yield (row.user_id, \n", " row.date_begin, \n", " row.date_begin, \n", " row.nb_days - int(row.nb_days),\n", " row.notes.split('+')[1].strip(), \n", " row.projet_id)\n", " # full days\n", " yield (row.user_id, \n", " row.date_begin + day, \n", " row.date_end,\n", " int(row.nb_days),\n", " row.notes.split('+')[0].strip(), \n", " row.projet_id)\n", " else:\n", " # do nothing\n", " yield row\n", "\n", " \n", "tbl = (etl\n", " .fromcsv(etl.StringSource(data))\n", " .convert(('date_begin', 'date_end'), etl.dateparser('%Y-%m-%d'))\n", " .convert('nb_days', etl.parsenumber)\n", " .rowmapmany(split_partial_days, fields=['user_id', 'date_begin', 'date_end', 'nb_days', 'notes', 'projet_id'])\n", ")\n", "tbl" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "
user_iddate_begindate_endnb_daysnotesprojet_id
user12014-07-312014-08-0655 dayscp
user12014-08-072014-08-070.5amcp
user22014-07-312014-07-310.5pmcp
user22014-08-012014-08-0755 dayscp
user32014-07-312014-08-0655 dayscp
\r\n" ], "metadata": {}, "output_type": "pyout", "prompt_number": 3, "text": [ "+-----------+----------------------------+----------------------------+-----------+----------+-------------+\n", "| 'user_id' | 'date_begin' | 'date_end' | 'nb_days' | 'notes' | 'projet_id' |\n", "+===========+============================+============================+===========+==========+=============+\n", "| 'user1' | datetime.date(2014, 7, 31) | datetime.date(2014, 8, 6) | 5 | '5 days' | 'cp' |\n", "+-----------+----------------------------+----------------------------+-----------+----------+-------------+\n", "| 'user1' | datetime.date(2014, 8, 7) | datetime.date(2014, 8, 7) | 0.5 | 'am' | 'cp' |\n", "+-----------+----------------------------+----------------------------+-----------+----------+-------------+\n", "| 'user2' | datetime.date(2014, 7, 31) | datetime.date(2014, 7, 31) | 0.5 | 'pm' | 'cp' |\n", "+-----------+----------------------------+----------------------------+-----------+----------+-------------+\n", "| 'user2' | datetime.date(2014, 8, 1) | datetime.date(2014, 8, 7) | 5 | '5 days' | 'cp' |\n", "+-----------+----------------------------+----------------------------+-----------+----------+-------------+\n", "| 'user3' | datetime.date(2014, 7, 31) | datetime.date(2014, 8, 6) | 5 | '5 days' | 'cp' |\n", "+-----------+----------------------------+----------------------------+-----------+----------+-------------+" ] } ], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 3 } ], "metadata": {} } ] }