{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: scikit-learn in /opt/conda/envs/Python36/lib/python3.6/site-packages (0.20.3)\n", "Requirement already satisfied: numpy>=1.8.2 in /opt/conda/envs/Python36/lib/python3.6/site-packages (from scikit-learn) (1.15.4)\n", "Requirement already satisfied: scipy>=0.13.3 in /opt/conda/envs/Python36/lib/python3.6/site-packages (from scikit-learn) (1.2.0)\n", "Requirement already up-to-date: pixiedust in /opt/conda/envs/Python36/lib/python3.6/site-packages (1.1.18)\n", "Requirement already satisfied, skipping upgrade: markdown in /opt/conda/envs/Python36/lib/python3.6/site-packages (from pixiedust) (3.0.1)\n", "Requirement already satisfied, skipping upgrade: requests in /opt/conda/envs/Python36/lib/python3.6/site-packages (from pixiedust) (2.21.0)\n", "Requirement already satisfied, skipping upgrade: colour in /opt/conda/envs/Python36/lib/python3.6/site-packages (from pixiedust) (0.1.5)\n", "Requirement already satisfied, skipping upgrade: mpld3 in /opt/conda/envs/Python36/lib/python3.6/site-packages (from pixiedust) (0.3)\n", "Requirement already satisfied, skipping upgrade: astunparse in /opt/conda/envs/Python36/lib/python3.6/site-packages (from pixiedust) (1.6.2)\n", "Requirement already satisfied, skipping upgrade: lxml in /opt/conda/envs/Python36/lib/python3.6/site-packages (from pixiedust) (4.3.1)\n", "Requirement already satisfied, skipping upgrade: geojson in /opt/conda/envs/Python36/lib/python3.6/site-packages (from pixiedust) (2.4.1)\n", "Requirement already satisfied, skipping upgrade: chardet<3.1.0,>=3.0.2 in /opt/conda/envs/Python36/lib/python3.6/site-packages (from requests->pixiedust) (3.0.4)\n", "Requirement already satisfied, skipping upgrade: urllib3<1.25,>=1.21.1 in /opt/conda/envs/Python36/lib/python3.6/site-packages (from requests->pixiedust) (1.24.1)\n", "Requirement already satisfied, skipping upgrade: idna<2.9,>=2.5 in /opt/conda/envs/Python36/lib/python3.6/site-packages (from requests->pixiedust) (2.8)\n", "Requirement already satisfied, skipping upgrade: certifi>=2017.4.17 in /opt/conda/envs/Python36/lib/python3.6/site-packages (from requests->pixiedust) (2019.11.28)\n", "Requirement already satisfied, skipping upgrade: wheel<1.0,>=0.23.0 in /opt/conda/envs/Python36/lib/python3.6/site-packages (from astunparse->pixiedust) (0.32.3)\n", "Requirement already satisfied, skipping upgrade: six<2.0,>=1.6.1 in /opt/conda/envs/Python36/lib/python3.6/site-packages (from astunparse->pixiedust) (1.12.0)\n" ] } ], "source": [ "!pip install scikit-learn\n", "!pip install --upgrade pixiedust" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Pixiedust database opened successfully\n" ] }, { "data": { "text/html": [ "\n", "
\n", " \n", " \n", " \n", " Pixiedust version 1.1.18\n", "
\n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import pixiedust\n", "import sklearn\n", "import pandas as pd\n", "from sklearn.ensemble import RandomForestClassifier\n", "import numpy as np\n", "from sklearn.model_selection import train_test_split\n", "from scipy.io import arff\n", "import brunel\n", "from watson_machine_learning_client import WatsonMachineLearningAPIClient" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Downloading 'https://raw.githubusercontent.com/apischdo/skillsacademy/master/Denormalized%20claims%20data.csv' from https://raw.githubusercontent.com/apischdo/skillsacademy/master/Denormalized%20claims%20data.csv\n", "Downloaded 463947 bytes\n", "Creating pandas DataFrame for 'https://raw.githubusercontent.com/apischdo/skillsacademy/master/Denormalized%20claims%20data.csv'. Please wait...\n", "Loading file using 'pandas'\n", "Successfully created pandas DataFrame for 'https://raw.githubusercontent.com/apischdo/skillsacademy/master/Denormalized%20claims%20data.csv'\n" ] } ], "source": [ "raw_df = pixiedust.sampleData('https://raw.githubusercontent.com/apischdo/skillsacademy/master/Denormalized%20claims%20data.csv')" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "pixiedust": { "displayParams": { "aggregation": "COUNT", "handlerId": "scatterPlot", "keyFields": "FLAG_FOR_FRAUD_INV", "rendererId": "matplotlib", "rowCount": "1000", "valueFields": "CLAIM_AMOUNT" } } }, "outputs": [ { "data": { "text/html": [ "
Hey, there's something awesome here! To see it, open this notebook outside GitHub, in a viewer like Jupyter
\n", "
\n", " \n", "
\n", "
\n", " \n", " \n", "
\n", " \n", " \n", " \n", "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "display(raw_df)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
\n", "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/javascript": [ "/*\n", " * Copyright (c) 2015 IBM Corporation and others.\n", " *\n", " * Licensed under the Apache License, Version 2.0 (the \"License\");\n", " * You may not use this file except in compliance with the License.\n", " * You may obtain a copy of the License at\n", " *\n", " * http://www.apache.org/licenses/LICENSE-2.0\n", " *\n", " * Unless required by applicable law or agreed to in writing, software\n", " * distributed under the License is distributed on an \"AS IS\" BASIS,\n", " * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", " * See the License for the specific language governing permissions and\n", " * limitations under the License.\n", " */\n", "\n", "require.config({\n", " waitSeconds: 60,\n", " paths: {\n", " 'd3': '//cdnjs.cloudflare.com/ajax/libs/d3/4.2.1/d3.min',\n", " 'topojson': '//cdnjs.cloudflare.com/ajax/libs/topojson/1.6.20/topojson.min',\n", " 'brunel' : '/data/jupyter2/static-file-content-delivery-network/nbextensions/brunel_ext/brunel.2.3.min',\n", " 'brunelControls' : '/data/jupyter2/static-file-content-delivery-network/nbextensions/brunel_ext/brunel.controls.2.3.min'\n", " },\n", " shim: {\n", " 'brunel' : {\n", " exports: 'BrunelD3',\n", " deps: ['d3', 'topojson'],\n", " init: function() {\n", " return {\n", " BrunelD3 : BrunelD3,\n", " BrunelData : BrunelData\n", " }\n", " }\n", " },\n", " 'brunelControls' : {\n", " exports: 'BrunelEventHandlers',\n", " init: function() {\n", " return {\n", " BrunelEventHandlers: BrunelEventHandlers,\n", " BrunelJQueryControlFactory: BrunelJQueryControlFactory\n", " }\n", " }\n", " }\n", "\n", " }\n", "\n", "});\n", "\n", "require([\"d3\"], function(d3) {\n", " require([\"brunel\", \"brunelControls\"], function(brunel, brunelControls) {\n", " function BrunelVis(visId) {\n", " \"use strict\"; // strict mode\n", " var datasets = [], // array of datasets for the original data\n", " pre = function(d, i) { return d }, // default pre-process does nothing\n", " post = function(d, i) { return d }, // default post-process does nothing\n", " transitionTime = 200, // transition time for animations\n", " charts = [], // the charts in the system\n", " vis = d3.select('#' + visId).attr('class', 'brunel'); // the SVG container\n", "\n", " BrunelD3.addDefinitions(vis); // ensure standard symbols present\n", "\n", " // Define chart #1 in the visualization //////////////////////////////////////////////////////////\n", "\n", " charts[0] = function(parentNode, filterRows) {\n", " var geom = BrunelD3.geometry(parentNode || vis.node(), 0, 0, 1, 1, 5, 60, 37, 13),\n", " elements = []; // array of elements in this chart\n", "\n", " // Define groups for the chart parts ///////////////////////////////////////////////////////////\n", "\n", " var chart = vis.append('g').attr('class', 'chart1')\n", " .attr('transform','translate(' + geom.chart_left + ',' + geom.chart_top + ')');\n", " var overlay = chart.append('g').attr('class', 'element').attr('class', 'overlay');\n", " var zoom = d3.zoom().scaleExtent([1/3,3]);\n", " var zoomNode = overlay.append('rect').attr('class', 'overlay')\n", " .attr('x', geom.inner_left).attr('y', geom.inner_top)\n", " .attr('width', geom.inner_rawWidth).attr('height', geom.inner_rawHeight)\n", " .style('cursor', 'move').call(zoom)\n", " .node();\n", " zoomNode.__zoom = d3.zoomIdentity;\n", " chart.append('rect').attr('class', 'background').attr('width', geom.chart_right-geom.chart_left).attr('height', geom.chart_bottom-geom.chart_top);\n", " var interior = chart.append('g').attr('class', 'interior zoomNone')\n", " .attr('transform','translate(' + geom.inner_left + ',' + geom.inner_top + ')')\n", " .attr('clip-path', 'url(#clip_visidddb1ba14-535d-11ea-a9ba-634709152970_chart1_inner)');\n", " interior.append('rect').attr('class', 'inner').attr('width', geom.inner_width).attr('height', geom.inner_height);\n", " var gridGroup = interior.append('g').attr('class', 'grid');\n", " var axes = chart.append('g').attr('class', 'axis')\n", " .attr('transform','translate(' + geom.inner_left + ',' + geom.inner_top + ')');\n", " vis.append('clipPath').attr('id', 'clip_visidddb1ba14-535d-11ea-a9ba-634709152970_chart1_inner').append('rect')\n", " .attr('x', 0).attr('y', 0)\n", " .attr('width', geom.inner_rawWidth+1).attr('height', geom.inner_rawHeight+1);\n", "\n", " // Scales //////////////////////////////////////////////////////////////////////////////////////\n", "\n", " var scale_x = d3.scaleLinear().domain([-0.2, 1.2000001])\n", " .range([0, geom.inner_width]);\n", " var scale_inner = d3.scaleLinear().domain([0,1])\n", " .range([-0.5, 0.5]);\n", " var scale_y = d3.scaleLinear().domain([0, 50000.005])\n", " .range([geom.inner_height, 0]);\n", " var base_scales = [scale_x, scale_y]; // untransformed original scales\n", "\n", " // Axes ////////////////////////////////////////////////////////////////////////////////////////\n", "\n", " axes.append('g').attr('class', 'x axis')\n", " .attr('transform','translate(0,' + geom.inner_rawHeight + ')')\n", " .attr('clip-path', 'url(#clip_visidddb1ba14-535d-11ea-a9ba-634709152970_chart1_haxis)');\n", " vis.append('clipPath').attr('id', 'clip_visidddb1ba14-535d-11ea-a9ba-634709152970_chart1_haxis').append('polyline')\n", " .attr('points', '-1,-1000, -1,-1 -5,5, -1000,5, -100,1000, 10000,1000 10000,-1000');\n", " axes.select('g.axis.x').append('text').attr('class', 'title').text('FLAG FOR FRAUD INV').style('text-anchor', 'middle')\n", " .attr('x',geom.inner_rawWidth/2)\n", " .attr('y', geom.inner_bottom - 2.0).attr('dy','-0.27em');\n", " axes.append('g').attr('class', 'y axis')\n", " .attr('clip-path', 'url(#clip_visidddb1ba14-535d-11ea-a9ba-634709152970_chart1_vaxis)');\n", " vis.append('clipPath').attr('id', 'clip_visidddb1ba14-535d-11ea-a9ba-634709152970_chart1_vaxis').append('polyline')\n", " .attr('points', '-1000,-10000, 10000,-10000, 10000,' + (geom.inner_rawHeight+1) + ', -1,' + (geom.inner_rawHeight+1) + ', -1,' + (geom.inner_rawHeight+5) + ', -1000,' + (geom.inner_rawHeight+5) );\n", " axes.select('g.axis.y').append('text').attr('class', 'title').text('CLAIM AMOUNT').style('text-anchor', 'middle')\n", " .attr('x',-geom.inner_rawHeight/2)\n", " .attr('y', 4-geom.inner_left).attr('dy', '0.7em').attr('transform', 'rotate(270)');\n", "\n", " var axis_bottom = d3.axisBottom(scale_x).ticks(Math.min(10, Math.round(geom.inner_width / 33.0)));\n", " var axis_left = d3.axisLeft(scale_y).ticks(Math.min(10, Math.round(geom.inner_width / 20)));\n", "\n", " function buildAxes(time) {\n", " var axis_x = axes.select('g.axis.x');\n", " BrunelD3.transition(axis_x, time).call(axis_bottom.scale(scale_x));\n", " var axis_y = axes.select('g.axis.y');\n", " BrunelD3.transition(axis_y, time).call(axis_left.scale(scale_y));\n", " }\n", " zoom.on('zoom', function(t, time) {\n", " t = t ||BrunelD3.restrictZoom(d3.event.transform, geom, this);\n", " scale_x = t.rescaleX(base_scales[0]);\n", " scale_y = t.rescaleY(base_scales[1]);\n", " zoomNode.__zoom = t;\n", " interior.attr('class', 'interior ' + BrunelD3.zoomLabel(t.k));;\n", " build(time || -1);\n", " });\n", "\n", " // Define element #1 ///////////////////////////////////////////////////////////////////////////\n", "\n", " elements[0] = function() {\n", " var original, processed, // data sets passed in and then transformed\n", " element, data, // brunel element information and brunel data\n", " selection, merged; // d3 selection and merged selection\n", " var elementGroup = interior.append('g').attr('class', 'element1'),\n", " main = elementGroup.append('g').attr('class', 'main'),\n", " labels = BrunelD3.undoTransform(elementGroup.append('g').attr('class', 'labels').attr('aria-hidden', 'true'), elementGroup);\n", "\n", " function makeData() {\n", " original = datasets[0];\n", " if (filterRows) original = original.retainRows(filterRows);\n", " processed = pre(original, 0);\n", " processed = post(processed, 0);\n", " var f0 = processed.field('FLAG_FOR_FRAUD_INV'),\n", " f1 = processed.field('CLAIM_AMOUNT'),\n", " f2 = processed.field('#row'),\n", " f3 = processed.field('#selection');\n", " var keyFunc = function(d) { return f2.value(d) };\n", " data = {\n", " FLAG_FOR_FRAUD_INV:function(d) { return f0.value(d.row) },\n", " CLAIM_AMOUNT: function(d) { return f1.value(d.row) },\n", " $row: function(d) { return f2.value(d.row) },\n", " $selection: function(d) { return f3.value(d.row) },\n", " FLAG_FOR_FRAUD_INV_f:function(d) { return f0.valueFormatted(d.row) },\n", " CLAIM_AMOUNT_f:function(d) { return f1.valueFormatted(d.row) },\n", " $row_f: function(d) { return f2.valueFormatted(d.row) },\n", " $selection_f: function(d) { return f3.valueFormatted(d.row) },\n", " _split: function(d) { return 'ALL' },\n", " _key: keyFunc,\n", " _rows: BrunelD3.makeRowsWithKeys(keyFunc, processed.rowCount())\n", " };\n", " }\n", "\n", " // Build element from data ///////////////////////////////////////////////////////////////////\n", "\n", " function build(transitionMillis) {\n", " element = elements[0];\n", " var w = Math.abs( scale_x(scale_x.domain()[0] + 1.0) - scale_x.range()[0] );\n", " var x = function(d) { return scale_x(data.FLAG_FOR_FRAUD_INV(d))};\n", " var h = geom.default_point_size;\n", " var y = function(d) { return scale_y(data.CLAIM_AMOUNT(d))};\n", "\n", " // Define selection entry operations\n", " function initialState(selection) {\n", " selection\n", " .attr('class', 'element point filled')\n", " .style('pointer-events', 'none')\n", " }\n", "\n", " // Define selection update operations on merged data\n", " function updateState(selection) {\n", " selection\n", " .attr('cx',function(d) { return scale_x(data.FLAG_FOR_FRAUD_INV(d))})\n", " .attr('cy',function(d) { return scale_y(data.CLAIM_AMOUNT(d))})\n", " .attr('r',Math.min(Math.abs( scale_x(scale_x.domain()[0] + 1.0) - scale_x.range()[0] ), geom.default_point_size) / 2);\n", " }\n", "\n", " // Define labeling for the selection\n", " function label(selection, transitionMillis) {\n", " }\n", " // Create selections, set the initial state and transition updates\n", " selection = main.selectAll('.element').data(data._rows, function(d) { return d.key });\n", " var added = selection.enter().append('circle');\n", " merged = selection.merge(added);\n", " initialState(added);\n", " selection.filter(BrunelD3.hasData)\n", " .classed('selected', BrunelD3.isSelected(data))\n", " .filter(BrunelD3.isSelected(data)).raise();\n", " updateState(BrunelD3.transition(merged, transitionMillis));\n", "\n", " BrunelD3.transition(selection.exit(), transitionMillis/3)\n", " .style('opacity', 0.5).each( function() {\n", " this.remove(); BrunelD3.removeLabels(this); \n", " });\n", " }\n", "\n", " return {\n", " data: function() { return processed },\n", " original: function() { return original },\n", " internal: function() { return data },\n", " selection: function() { return merged },\n", " makeData: makeData,\n", " build: build,\n", " chart: function() { return charts[0] },\n", " group: function() { return elementGroup },\n", " fields: {\n", " x: ['FLAG_FOR_FRAUD_INV'],\n", " y: ['CLAIM_AMOUNT'],\n", " key: ['#row']\n", " }\n", " };\n", " }();\n", "\n", " function build(time, noData) {\n", " var first = elements[0].data() == null;\n", " if (first) time = 0; // no transition for first call\n", " buildAxes(time);\n", " if ((first || time > -1) && !noData) {\n", " elements[0].makeData();\n", " }\n", " elements[0].build(time);\n", " }\n", "\n", " // Expose the following components of the chart\n", " return {\n", " elements : elements,\n", " interior : interior,\n", " scales: {x:scale_x, y:scale_y},\n", " zoom: function(params, time) {\n", " if (params) zoom.on('zoom').call(zoomNode, params, time);\n", " return d3.zoomTransform(zoomNode);\n", " },\n", " build : build\n", " };\n", " }();\n", "\n", " function setData(rowData, i) { datasets[i||0] = BrunelD3.makeData(rowData) }\n", " function updateAll(time) { charts.forEach(function(x) {x.build(time || 0)}) }\n", " function buildAll() {\n", " for (var i=0;i" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%brunel data(\"raw_df\") x(FLAG_FOR_FRAUD_INV) y(CLAIM_AMOUNT)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "rel_cols = ['HOUSEHOLD_ID','DRIVER_ID','POLICY_ID','CLAIM_ID','LOSS_EVENT_TIME','CLAIM_INIT_TIME', 'ODOMETER_AT_LOSS','CLAIMS_AT_LOSS_DATE','LOSS_LOCATION_LAT','LOSS_LOCATION_LONG','CLAIM_AMOUNT','FLAG_FOR_FRAUD_INV']" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "df_claim = raw_df[rel_cols]" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
HOUSEHOLD_IDDRIVER_IDPOLICY_IDCLAIM_IDLOSS_EVENT_TIMECLAIM_INIT_TIMEODOMETER_AT_LOSSCLAIMS_AT_LOSS_DATELOSS_LOCATION_LATLOSS_LOCATION_LONGCLAIM_AMOUNTFLAG_FOR_FRAUD_INV
0CH42335XZJ2837NW5567882A-2017-UU9072017-04-25 00:00:00.02017-04-28 00:00:00.0157654.9141.902103-87.75562435765.01
1IH49805VVR6423UR4864804A-2018-FI4812018-08-26 00:00:00.02018-08-31 00:00:00.0226154.5141.963562-87.7313971909.00
2AF28736UQM2512RR8595908A-2016-ZG6942016-01-07 00:00:00.02016-01-11 00:00:00.083968.6241.736602-87.60496825730.01
3EF53594YDT5591RN5640634A-2016-NG7832016-12-11 00:00:00.02016-12-18 00:00:00.0309570.3141.909925-87.73155740880.01
4LD32277ONM5465YY1229530A-2017-ZO8632017-06-06 00:00:00.02017-06-08 00:00:00.0136633.9141.923750-87.7898812130.00
5DM94074GBU7751XP3473763A-2018-XB4322018-03-06 00:00:00.02018-03-19 00:00:00.0326514.1141.909257-87.7850571970.00
6MD38210CBR4335US5444269A-2017-XP7582017-08-13 00:00:00.02017-08-18 00:00:00.058477.9141.928395-87.7964682290.00
7GL77908HZF3884XR1994270A-2017-QY9462017-02-10 00:00:00.02017-02-18 00:00:00.0176476.9141.885716-87.7283762990.00
8BA26199CSE9523VP6368585A-2018-LB8182018-02-16 00:00:00.02018-02-24 00:00:00.0277812.7141.895054-87.7456621170.00
9EA38976HFX7408YP9758006A-2016-QD3382016-12-28 00:00:00.02017-01-11 00:00:00.0190541.3141.891976-87.6145802090.00
10GB64343PXE3728XY6800348A-2017-CK7102017-08-12 00:00:00.02017-08-25 00:00:00.0290975.1141.911223-87.6386562130.00
11JG99629OKH5337ZK6994471A-2018-WF1142018-07-30 00:00:00.02018-07-30 00:00:00.0159873.2541.735835-87.66786633040.01
12FH81231RQZ1566ZR4462879A-2018-VT4702018-02-22 00:00:00.02018-02-26 00:00:00.0391866.9141.734164-87.5512112400.00
13KI98597VRX5780ZW4263453A-2017-SA2792017-02-06 00:00:00.02017-02-06 00:00:00.0384000.6341.791852-87.80137821330.01
14CG61685TIA1702UW8176531A-2018-EJ3682018-07-10 00:00:00.02018-07-18 00:00:00.0251644.8142.005931-87.6806603427.00
15AB12181OBD8151RW8652538A-2017-CT5872017-07-29 00:00:00.02017-08-10 00:00:00.0300279.3141.693401-87.61232235385.51
16BJ63668WDC2749PQ3189850A-2017-CU8172017-12-30 00:00:00.02018-01-03 00:00:00.073419.5141.840145-87.6613391570.00
17MG83193XJB5149XN9015815A-2018-LF9722018-01-16 00:00:00.02018-01-28 00:00:00.0118923.9141.975175-87.7683442370.00
18IJ01709CRP8660SV1659058A-2017-ML6772017-04-25 00:00:00.02017-05-07 00:00:00.0182681.3141.890173-87.64999226250.01
19BC94182PEQ6155UQ8163328A-2018-WZ4922018-02-10 00:00:00.02018-02-19 00:00:00.0160429.9341.840658-87.72589925830.01
20MF96553PPI8400OT0287150A-2018-DI4512018-01-23 00:00:00.02018-01-24 00:00:00.0165363.7441.884705-87.66704623483.01
21BC90853JOA9170TW2172802A-2016-RW6532016-12-01 00:00:00.02016-12-10 00:00:00.0240639.2141.736183-87.6436602173.50
22AK42388KNQ4268PP7360289A-2017-KO8122017-07-12 00:00:00.02017-07-12 00:00:00.0388591.3141.762030-87.6116961750.00
23HB16909SRU2950OY6516260A-2016-CA7402016-01-06 00:00:00.02016-01-08 00:00:00.059498.7142.006422-87.67812020310.01
24FA97534EHL9301OS6708536A-2018-PV5772018-10-13 00:00:00.02018-10-25 00:00:00.094043.6141.813602-87.7043382810.00
25CF57572CJY6373QV4842191A-2017-JI5712017-07-19 00:00:00.02017-07-19 00:00:00.069305.4141.996924-87.8069452370.00
26ME37855NCL2868VT9807337A-2018-CK2152018-11-04 00:00:00.02018-11-13 00:00:00.0207764.4141.952228-87.7274251320.00
27LA79549YWA1319PU5439335A-2017-FJ2172017-05-31 00:00:00.02017-06-12 00:00:00.0302710.0541.976895-87.69228030840.01
28II25056CVI8835OJ5466096A-2017-YI1332017-03-01 00:00:00.02017-03-13 00:00:00.0109080.0341.880781-87.7302321230.50
29CK11297HIR9014RR5447110A-2018-QW1322018-09-05 00:00:00.02018-09-11 00:00:00.069640.2141.892753-87.7607091020.00
.......................................
945FH69052GSN4940VR3029092A-2018-IZ9382018-08-01 00:00:00.02018-08-09 00:00:00.0588158.9141.997861-87.6886051160.00
946LD06091EGF8629TQ4763765A-2016-DR8892016-09-12 00:00:00.02016-09-12 00:00:00.0220827.8141.722449-87.58500420550.01
947HA14850QAO5663XV0936944A-2018-XV5352018-09-29 00:00:00.02018-10-02 00:00:00.0146983.7241.837622-87.6658872620.00
948LJ86931YXF8421XO1420265A-2017-SS6682017-07-02 00:00:00.02017-07-11 00:00:00.0149332.0141.810689-87.7110952460.00
949AG86132VDU5872QP0855306A-2018-CY4952018-09-23 00:00:00.02018-09-23 00:00:00.062842.7241.922121-87.6302431180.00
950GE81740HFZ7313QW3622319A-2016-BI4072016-11-11 00:00:00.02016-11-18 00:00:00.0214021.1141.753357-87.7026432080.00
951LK14209JDC8664US7928033A-2016-NI8142016-01-11 00:00:00.02016-01-18 00:00:00.0172955.9141.866254-87.66652413820.01
952EI71996QBV9830YU2221856A-2018-CU5292018-06-11 00:00:00.02018-06-23 00:00:00.0293880.1141.858883-87.69336329600.01
953IF40191LFW8264YY7862368A-2017-YB1902017-03-08 00:00:00.02017-03-18 00:00:00.0118784.9241.911261-87.6790551000.00
954EH17979EUT2904SP1179929A-2017-RM6212017-07-12 00:00:00.02017-07-20 00:00:00.0227731.0141.857835-87.66628737200.01
955HC35643PBW7429RZ2794532A-2018-KQ1192018-10-14 00:00:00.02018-10-22 00:00:00.0157867.5341.968902-87.66053223980.01
956JF75578SGO7528SV1869698A-2016-EC3202016-05-25 00:00:00.02016-06-01 00:00:00.0454638.7441.708182-87.5665351587.00
957DA02226IRJ6787VP1026284A-2017-EW1722017-09-25 00:00:00.02017-09-28 00:00:00.0237136.2541.823685-87.6217472530.01
958AK18614BKZ8467ZT7214353A-2018-CL2162018-01-10 00:00:00.02018-01-15 00:00:00.0325206.2141.754660-87.74138528050.01
959HI45009SNZ2467PZ8927119A-2018-DS7752018-08-28 00:00:00.02018-08-30 00:00:00.0112831.5141.746195-87.5513081350.00
960BB70679PDF9673ZQ9923600A-2018-UG8472018-01-21 00:00:00.02018-01-29 00:00:00.0137111.1241.886752-87.67124713710.01
961HG42702TYL6617OQ2049136A-2017-KO8412017-03-05 00:00:00.02017-03-06 00:00:00.0172496.5141.765291-87.60454432960.01
962KK79654LIK6490VO4861035A-2017-OB9452017-10-18 00:00:00.02017-11-01 00:00:00.0253215.2141.675067-87.6383552120.00
963FC64633EXS9829NU1726936A-2017-LI1762017-05-22 00:00:00.02017-06-02 00:00:00.0342907.1141.925275-87.6671241430.00
964FG85355SAC4402YN8255193A-2016-DP8562016-02-06 00:00:00.02016-02-10 00:00:00.0120261.6141.976099-87.7109761253.50
965EL47810FND5049NY8366901A-2018-ZE5222018-04-03 00:00:00.02018-04-17 00:00:00.053558.5541.957653-87.7058372730.01
966FF77811FLR5428PZ8862368A-2017-TD3852017-11-14 00:00:00.02017-11-24 00:00:00.0156352.7141.775257-87.6406032250.00
967JC25299CDZ4102SS8155048A-2018-FD5172018-03-11 00:00:00.02018-03-15 00:00:00.063924.0141.997355-87.7656142070.00
968LI38945HTQ6817NS6540979A-2018-UH1532018-08-24 00:00:00.02018-09-03 00:00:00.0105133.8141.946016-87.76444233210.01
969EE18899PUN8916NY8220476A-2017-BI6142017-06-13 00:00:00.02017-06-25 00:00:00.0299557.9141.867125-87.6470652440.00
970CM11612GJI9724QY3512250A-2016-TJ2102016-11-29 00:00:00.02016-12-01 00:00:00.077438.7141.876984-87.74282444666.01
971HM55275ORD4650XR9202123A-2016-EU6782016-08-14 00:00:00.02016-08-21 00:00:00.0219285.9142.002596-87.6609322320.00
972BA79937BLV8655TU0769843A-2016-MT9712016-08-22 00:00:00.02016-09-03 00:00:00.0192320.1141.778640-87.72261339307.01
973AL29368XBH9030UU9571126A-2017-ZX3882017-01-24 00:00:00.02017-02-04 00:00:00.0273023.9141.770738-87.7227071720.00
974CK27900BZQ1079SV2796501A-2016-GP4772016-07-05 00:00:00.02016-07-09 00:00:00.0178004.2141.768244-87.6198561050.00
\n", "

975 rows × 12 columns

\n", "
" ], "text/plain": [ " HOUSEHOLD_ID DRIVER_ID POLICY_ID CLAIM_ID LOSS_EVENT_TIME \\\n", "0 CH42335 XZJ2837 NW5567882 A-2017-UU907 2017-04-25 00:00:00.0 \n", "1 IH49805 VVR6423 UR4864804 A-2018-FI481 2018-08-26 00:00:00.0 \n", "2 AF28736 UQM2512 RR8595908 A-2016-ZG694 2016-01-07 00:00:00.0 \n", "3 EF53594 YDT5591 RN5640634 A-2016-NG783 2016-12-11 00:00:00.0 \n", "4 LD32277 ONM5465 YY1229530 A-2017-ZO863 2017-06-06 00:00:00.0 \n", "5 DM94074 GBU7751 XP3473763 A-2018-XB432 2018-03-06 00:00:00.0 \n", "6 MD38210 CBR4335 US5444269 A-2017-XP758 2017-08-13 00:00:00.0 \n", "7 GL77908 HZF3884 XR1994270 A-2017-QY946 2017-02-10 00:00:00.0 \n", "8 BA26199 CSE9523 VP6368585 A-2018-LB818 2018-02-16 00:00:00.0 \n", "9 EA38976 HFX7408 YP9758006 A-2016-QD338 2016-12-28 00:00:00.0 \n", "10 GB64343 PXE3728 XY6800348 A-2017-CK710 2017-08-12 00:00:00.0 \n", "11 JG99629 OKH5337 ZK6994471 A-2018-WF114 2018-07-30 00:00:00.0 \n", "12 FH81231 RQZ1566 ZR4462879 A-2018-VT470 2018-02-22 00:00:00.0 \n", "13 KI98597 VRX5780 ZW4263453 A-2017-SA279 2017-02-06 00:00:00.0 \n", "14 CG61685 TIA1702 UW8176531 A-2018-EJ368 2018-07-10 00:00:00.0 \n", "15 AB12181 OBD8151 RW8652538 A-2017-CT587 2017-07-29 00:00:00.0 \n", "16 BJ63668 WDC2749 PQ3189850 A-2017-CU817 2017-12-30 00:00:00.0 \n", "17 MG83193 XJB5149 XN9015815 A-2018-LF972 2018-01-16 00:00:00.0 \n", "18 IJ01709 CRP8660 SV1659058 A-2017-ML677 2017-04-25 00:00:00.0 \n", "19 BC94182 PEQ6155 UQ8163328 A-2018-WZ492 2018-02-10 00:00:00.0 \n", "20 MF96553 PPI8400 OT0287150 A-2018-DI451 2018-01-23 00:00:00.0 \n", "21 BC90853 JOA9170 TW2172802 A-2016-RW653 2016-12-01 00:00:00.0 \n", "22 AK42388 KNQ4268 PP7360289 A-2017-KO812 2017-07-12 00:00:00.0 \n", "23 HB16909 SRU2950 OY6516260 A-2016-CA740 2016-01-06 00:00:00.0 \n", "24 FA97534 EHL9301 OS6708536 A-2018-PV577 2018-10-13 00:00:00.0 \n", "25 CF57572 CJY6373 QV4842191 A-2017-JI571 2017-07-19 00:00:00.0 \n", "26 ME37855 NCL2868 VT9807337 A-2018-CK215 2018-11-04 00:00:00.0 \n", "27 LA79549 YWA1319 PU5439335 A-2017-FJ217 2017-05-31 00:00:00.0 \n", "28 II25056 CVI8835 OJ5466096 A-2017-YI133 2017-03-01 00:00:00.0 \n", "29 CK11297 HIR9014 RR5447110 A-2018-QW132 2018-09-05 00:00:00.0 \n", ".. ... ... ... ... ... \n", "945 FH69052 GSN4940 VR3029092 A-2018-IZ938 2018-08-01 00:00:00.0 \n", "946 LD06091 EGF8629 TQ4763765 A-2016-DR889 2016-09-12 00:00:00.0 \n", "947 HA14850 QAO5663 XV0936944 A-2018-XV535 2018-09-29 00:00:00.0 \n", "948 LJ86931 YXF8421 XO1420265 A-2017-SS668 2017-07-02 00:00:00.0 \n", "949 AG86132 VDU5872 QP0855306 A-2018-CY495 2018-09-23 00:00:00.0 \n", "950 GE81740 HFZ7313 QW3622319 A-2016-BI407 2016-11-11 00:00:00.0 \n", "951 LK14209 JDC8664 US7928033 A-2016-NI814 2016-01-11 00:00:00.0 \n", "952 EI71996 QBV9830 YU2221856 A-2018-CU529 2018-06-11 00:00:00.0 \n", "953 IF40191 LFW8264 YY7862368 A-2017-YB190 2017-03-08 00:00:00.0 \n", "954 EH17979 EUT2904 SP1179929 A-2017-RM621 2017-07-12 00:00:00.0 \n", "955 HC35643 PBW7429 RZ2794532 A-2018-KQ119 2018-10-14 00:00:00.0 \n", "956 JF75578 SGO7528 SV1869698 A-2016-EC320 2016-05-25 00:00:00.0 \n", "957 DA02226 IRJ6787 VP1026284 A-2017-EW172 2017-09-25 00:00:00.0 \n", "958 AK18614 BKZ8467 ZT7214353 A-2018-CL216 2018-01-10 00:00:00.0 \n", "959 HI45009 SNZ2467 PZ8927119 A-2018-DS775 2018-08-28 00:00:00.0 \n", "960 BB70679 PDF9673 ZQ9923600 A-2018-UG847 2018-01-21 00:00:00.0 \n", "961 HG42702 TYL6617 OQ2049136 A-2017-KO841 2017-03-05 00:00:00.0 \n", "962 KK79654 LIK6490 VO4861035 A-2017-OB945 2017-10-18 00:00:00.0 \n", "963 FC64633 EXS9829 NU1726936 A-2017-LI176 2017-05-22 00:00:00.0 \n", "964 FG85355 SAC4402 YN8255193 A-2016-DP856 2016-02-06 00:00:00.0 \n", "965 EL47810 FND5049 NY8366901 A-2018-ZE522 2018-04-03 00:00:00.0 \n", "966 FF77811 FLR5428 PZ8862368 A-2017-TD385 2017-11-14 00:00:00.0 \n", "967 JC25299 CDZ4102 SS8155048 A-2018-FD517 2018-03-11 00:00:00.0 \n", "968 LI38945 HTQ6817 NS6540979 A-2018-UH153 2018-08-24 00:00:00.0 \n", "969 EE18899 PUN8916 NY8220476 A-2017-BI614 2017-06-13 00:00:00.0 \n", "970 CM11612 GJI9724 QY3512250 A-2016-TJ210 2016-11-29 00:00:00.0 \n", "971 HM55275 ORD4650 XR9202123 A-2016-EU678 2016-08-14 00:00:00.0 \n", "972 BA79937 BLV8655 TU0769843 A-2016-MT971 2016-08-22 00:00:00.0 \n", "973 AL29368 XBH9030 UU9571126 A-2017-ZX388 2017-01-24 00:00:00.0 \n", "974 CK27900 BZQ1079 SV2796501 A-2016-GP477 2016-07-05 00:00:00.0 \n", "\n", " CLAIM_INIT_TIME ODOMETER_AT_LOSS CLAIMS_AT_LOSS_DATE \\\n", "0 2017-04-28 00:00:00.0 157654.9 1 \n", "1 2018-08-31 00:00:00.0 226154.5 1 \n", "2 2016-01-11 00:00:00.0 83968.6 2 \n", "3 2016-12-18 00:00:00.0 309570.3 1 \n", "4 2017-06-08 00:00:00.0 136633.9 1 \n", "5 2018-03-19 00:00:00.0 326514.1 1 \n", "6 2017-08-18 00:00:00.0 58477.9 1 \n", "7 2017-02-18 00:00:00.0 176476.9 1 \n", "8 2018-02-24 00:00:00.0 277812.7 1 \n", "9 2017-01-11 00:00:00.0 190541.3 1 \n", "10 2017-08-25 00:00:00.0 290975.1 1 \n", "11 2018-07-30 00:00:00.0 159873.2 5 \n", "12 2018-02-26 00:00:00.0 391866.9 1 \n", "13 2017-02-06 00:00:00.0 384000.6 3 \n", "14 2018-07-18 00:00:00.0 251644.8 1 \n", "15 2017-08-10 00:00:00.0 300279.3 1 \n", "16 2018-01-03 00:00:00.0 73419.5 1 \n", "17 2018-01-28 00:00:00.0 118923.9 1 \n", "18 2017-05-07 00:00:00.0 182681.3 1 \n", "19 2018-02-19 00:00:00.0 160429.9 3 \n", "20 2018-01-24 00:00:00.0 165363.7 4 \n", "21 2016-12-10 00:00:00.0 240639.2 1 \n", "22 2017-07-12 00:00:00.0 388591.3 1 \n", "23 2016-01-08 00:00:00.0 59498.7 1 \n", "24 2018-10-25 00:00:00.0 94043.6 1 \n", "25 2017-07-19 00:00:00.0 69305.4 1 \n", "26 2018-11-13 00:00:00.0 207764.4 1 \n", "27 2017-06-12 00:00:00.0 302710.0 5 \n", "28 2017-03-13 00:00:00.0 109080.0 3 \n", "29 2018-09-11 00:00:00.0 69640.2 1 \n", ".. ... ... ... \n", "945 2018-08-09 00:00:00.0 588158.9 1 \n", "946 2016-09-12 00:00:00.0 220827.8 1 \n", "947 2018-10-02 00:00:00.0 146983.7 2 \n", "948 2017-07-11 00:00:00.0 149332.0 1 \n", "949 2018-09-23 00:00:00.0 62842.7 2 \n", "950 2016-11-18 00:00:00.0 214021.1 1 \n", "951 2016-01-18 00:00:00.0 172955.9 1 \n", "952 2018-06-23 00:00:00.0 293880.1 1 \n", "953 2017-03-18 00:00:00.0 118784.9 2 \n", "954 2017-07-20 00:00:00.0 227731.0 1 \n", "955 2018-10-22 00:00:00.0 157867.5 3 \n", "956 2016-06-01 00:00:00.0 454638.7 4 \n", "957 2017-09-28 00:00:00.0 237136.2 5 \n", "958 2018-01-15 00:00:00.0 325206.2 1 \n", "959 2018-08-30 00:00:00.0 112831.5 1 \n", "960 2018-01-29 00:00:00.0 137111.1 2 \n", "961 2017-03-06 00:00:00.0 172496.5 1 \n", "962 2017-11-01 00:00:00.0 253215.2 1 \n", "963 2017-06-02 00:00:00.0 342907.1 1 \n", "964 2016-02-10 00:00:00.0 120261.6 1 \n", "965 2018-04-17 00:00:00.0 53558.5 5 \n", "966 2017-11-24 00:00:00.0 156352.7 1 \n", "967 2018-03-15 00:00:00.0 63924.0 1 \n", "968 2018-09-03 00:00:00.0 105133.8 1 \n", "969 2017-06-25 00:00:00.0 299557.9 1 \n", "970 2016-12-01 00:00:00.0 77438.7 1 \n", "971 2016-08-21 00:00:00.0 219285.9 1 \n", "972 2016-09-03 00:00:00.0 192320.1 1 \n", "973 2017-02-04 00:00:00.0 273023.9 1 \n", "974 2016-07-09 00:00:00.0 178004.2 1 \n", "\n", " LOSS_LOCATION_LAT LOSS_LOCATION_LONG CLAIM_AMOUNT FLAG_FOR_FRAUD_INV \n", "0 41.902103 -87.755624 35765.0 1 \n", "1 41.963562 -87.731397 1909.0 0 \n", "2 41.736602 -87.604968 25730.0 1 \n", "3 41.909925 -87.731557 40880.0 1 \n", "4 41.923750 -87.789881 2130.0 0 \n", "5 41.909257 -87.785057 1970.0 0 \n", "6 41.928395 -87.796468 2290.0 0 \n", "7 41.885716 -87.728376 2990.0 0 \n", "8 41.895054 -87.745662 1170.0 0 \n", "9 41.891976 -87.614580 2090.0 0 \n", "10 41.911223 -87.638656 2130.0 0 \n", "11 41.735835 -87.667866 33040.0 1 \n", "12 41.734164 -87.551211 2400.0 0 \n", "13 41.791852 -87.801378 21330.0 1 \n", "14 42.005931 -87.680660 3427.0 0 \n", "15 41.693401 -87.612322 35385.5 1 \n", "16 41.840145 -87.661339 1570.0 0 \n", "17 41.975175 -87.768344 2370.0 0 \n", "18 41.890173 -87.649992 26250.0 1 \n", "19 41.840658 -87.725899 25830.0 1 \n", "20 41.884705 -87.667046 23483.0 1 \n", "21 41.736183 -87.643660 2173.5 0 \n", "22 41.762030 -87.611696 1750.0 0 \n", "23 42.006422 -87.678120 20310.0 1 \n", "24 41.813602 -87.704338 2810.0 0 \n", "25 41.996924 -87.806945 2370.0 0 \n", "26 41.952228 -87.727425 1320.0 0 \n", "27 41.976895 -87.692280 30840.0 1 \n", "28 41.880781 -87.730232 1230.5 0 \n", "29 41.892753 -87.760709 1020.0 0 \n", ".. ... ... ... ... \n", "945 41.997861 -87.688605 1160.0 0 \n", "946 41.722449 -87.585004 20550.0 1 \n", "947 41.837622 -87.665887 2620.0 0 \n", "948 41.810689 -87.711095 2460.0 0 \n", "949 41.922121 -87.630243 1180.0 0 \n", "950 41.753357 -87.702643 2080.0 0 \n", "951 41.866254 -87.666524 13820.0 1 \n", "952 41.858883 -87.693363 29600.0 1 \n", "953 41.911261 -87.679055 1000.0 0 \n", "954 41.857835 -87.666287 37200.0 1 \n", "955 41.968902 -87.660532 23980.0 1 \n", "956 41.708182 -87.566535 1587.0 0 \n", "957 41.823685 -87.621747 2530.0 1 \n", "958 41.754660 -87.741385 28050.0 1 \n", "959 41.746195 -87.551308 1350.0 0 \n", "960 41.886752 -87.671247 13710.0 1 \n", "961 41.765291 -87.604544 32960.0 1 \n", "962 41.675067 -87.638355 2120.0 0 \n", "963 41.925275 -87.667124 1430.0 0 \n", "964 41.976099 -87.710976 1253.5 0 \n", "965 41.957653 -87.705837 2730.0 1 \n", "966 41.775257 -87.640603 2250.0 0 \n", "967 41.997355 -87.765614 2070.0 0 \n", "968 41.946016 -87.764442 33210.0 1 \n", "969 41.867125 -87.647065 2440.0 0 \n", "970 41.876984 -87.742824 44666.0 1 \n", "971 42.002596 -87.660932 2320.0 0 \n", "972 41.778640 -87.722613 39307.0 1 \n", "973 41.770738 -87.722707 1720.0 0 \n", "974 41.768244 -87.619856 1050.0 0 \n", "\n", "[975 rows x 12 columns]" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_claim" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "rel_cols_2 = [\"DRIVER_ID\",\"DRIVERS_LICENSE_EXPIRY\",\"DATE_AT_CURRENT_ADDRESS\"]\n", "df_driver = raw_df[rel_cols_2]" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
DRIVER_IDDRIVERS_LICENSE_EXPIRYDATE_AT_CURRENT_ADDRESS
0XZJ28372018-08-191999-04-16
1VVR64232018-01-272011-06-09
2UQM25122019-11-192005-05-21
3YDT55912019-05-162000-03-11
4ONM54652020-07-062012-07-04
5GBU77512021-09-262001-07-15
6CBR43352019-05-102004-04-08
7HZF38842019-04-261999-02-02
8CSE95232021-04-152005-03-07
9HFX74082019-08-072012-03-04
10PXE37282019-07-022017-05-03
11OKH53372018-03-292008-07-06
12RQZ15662018-04-012012-05-15
13VRX57802019-07-132007-01-14
14TIA17022020-04-252009-09-25
15OBD81512019-04-131999-07-04
16WDC27492020-07-152002-05-21
17XJB51492018-07-312012-07-16
18CRP86602019-09-282004-11-26
19PEQ61552021-07-222007-02-23
20PPI84002021-11-262006-09-18
21JOA91702020-09-091999-05-16
22KNQ42682021-03-102008-02-12
23SRU29502021-01-222006-11-01
24EHL93012019-10-072001-03-12
25CJY63732021-10-252000-01-07
26NCL28682019-05-162005-05-05
27YWA13192019-04-282012-11-27
28CVI88352018-12-082015-08-19
29HIR90142021-11-102014-07-18
............
945GSN49402018-01-032015-07-10
946EGF86292020-04-282016-02-27
947QAO56632018-01-302014-09-02
948YXF84212018-06-162006-09-05
949VDU58722018-01-302007-04-04
950HFZ73132018-10-062014-08-07
951JDC86642018-08-112017-02-04
952QBV98302021-11-192015-06-25
953LFW82642021-03-262006-07-03
954EUT29042021-12-102008-02-23
955PBW74292020-09-082017-05-13
956SGO75282020-03-241999-03-18
957IRJ67872021-11-132013-06-19
958BKZ84672021-11-252004-04-06
959SNZ24672021-02-142003-07-22
960PDF96732020-01-012005-03-27
961TYL66172020-08-212014-04-27
962LIK64902020-03-142010-03-17
963EXS98292021-05-082009-03-10
964SAC44022021-03-062004-05-19
965FND50492018-07-252012-01-07
966FLR54282018-04-032002-09-14
967CDZ41022021-06-032009-02-16
968HTQ68172020-03-172013-07-25
969PUN89162019-11-262013-01-19
970GJI97242018-06-232015-06-21
971ORD46502019-08-162014-05-11
972BLV86552019-12-242014-01-14
973XBH90302020-09-172010-02-11
974BZQ10792020-07-222003-09-12
\n", "

975 rows × 3 columns

\n", "
" ], "text/plain": [ " DRIVER_ID DRIVERS_LICENSE_EXPIRY DATE_AT_CURRENT_ADDRESS\n", "0 XZJ2837 2018-08-19 1999-04-16\n", "1 VVR6423 2018-01-27 2011-06-09\n", "2 UQM2512 2019-11-19 2005-05-21\n", "3 YDT5591 2019-05-16 2000-03-11\n", "4 ONM5465 2020-07-06 2012-07-04\n", "5 GBU7751 2021-09-26 2001-07-15\n", "6 CBR4335 2019-05-10 2004-04-08\n", "7 HZF3884 2019-04-26 1999-02-02\n", "8 CSE9523 2021-04-15 2005-03-07\n", "9 HFX7408 2019-08-07 2012-03-04\n", "10 PXE3728 2019-07-02 2017-05-03\n", "11 OKH5337 2018-03-29 2008-07-06\n", "12 RQZ1566 2018-04-01 2012-05-15\n", "13 VRX5780 2019-07-13 2007-01-14\n", "14 TIA1702 2020-04-25 2009-09-25\n", "15 OBD8151 2019-04-13 1999-07-04\n", "16 WDC2749 2020-07-15 2002-05-21\n", "17 XJB5149 2018-07-31 2012-07-16\n", "18 CRP8660 2019-09-28 2004-11-26\n", "19 PEQ6155 2021-07-22 2007-02-23\n", "20 PPI8400 2021-11-26 2006-09-18\n", "21 JOA9170 2020-09-09 1999-05-16\n", "22 KNQ4268 2021-03-10 2008-02-12\n", "23 SRU2950 2021-01-22 2006-11-01\n", "24 EHL9301 2019-10-07 2001-03-12\n", "25 CJY6373 2021-10-25 2000-01-07\n", "26 NCL2868 2019-05-16 2005-05-05\n", "27 YWA1319 2019-04-28 2012-11-27\n", "28 CVI8835 2018-12-08 2015-08-19\n", "29 HIR9014 2021-11-10 2014-07-18\n", ".. ... ... ...\n", "945 GSN4940 2018-01-03 2015-07-10\n", "946 EGF8629 2020-04-28 2016-02-27\n", "947 QAO5663 2018-01-30 2014-09-02\n", "948 YXF8421 2018-06-16 2006-09-05\n", "949 VDU5872 2018-01-30 2007-04-04\n", "950 HFZ7313 2018-10-06 2014-08-07\n", "951 JDC8664 2018-08-11 2017-02-04\n", "952 QBV9830 2021-11-19 2015-06-25\n", "953 LFW8264 2021-03-26 2006-07-03\n", "954 EUT2904 2021-12-10 2008-02-23\n", "955 PBW7429 2020-09-08 2017-05-13\n", "956 SGO7528 2020-03-24 1999-03-18\n", "957 IRJ6787 2021-11-13 2013-06-19\n", "958 BKZ8467 2021-11-25 2004-04-06\n", "959 SNZ2467 2021-02-14 2003-07-22\n", "960 PDF9673 2020-01-01 2005-03-27\n", "961 TYL6617 2020-08-21 2014-04-27\n", "962 LIK6490 2020-03-14 2010-03-17\n", "963 EXS9829 2021-05-08 2009-03-10\n", "964 SAC4402 2021-03-06 2004-05-19\n", "965 FND5049 2018-07-25 2012-01-07\n", "966 FLR5428 2018-04-03 2002-09-14\n", "967 CDZ4102 2021-06-03 2009-02-16\n", "968 HTQ6817 2020-03-17 2013-07-25\n", "969 PUN8916 2019-11-26 2013-01-19\n", "970 GJI9724 2018-06-23 2015-06-21\n", "971 ORD4650 2019-08-16 2014-05-11\n", "972 BLV8655 2019-12-24 2014-01-14\n", "973 XBH9030 2020-09-17 2010-02-11\n", "974 BZQ1079 2020-07-22 2003-09-12\n", "\n", "[975 rows x 3 columns]" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_driver" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "rel_cols_3 = [\"POLICY_ID\",\"EXPIRY_DATE\",\"START_DATE\",\"LOW_MILEAGE_USE\"]\n", "df_policy = raw_df[rel_cols_3]" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
POLICY_IDEXPIRY_DATESTART_DATELOW_MILEAGE_USE
0NW55678822017-09-192016-09-190
1UR48648042018-06-142017-06-140
2RR85959082016-08-232015-08-241
3RN56406342016-07-242015-07-250
4YY12295302017-01-072016-01-080
5XP34737632018-01-152017-01-150
6US54442692017-07-082016-07-081
7XR19942702017-08-112016-08-110
8VP63685852018-10-132017-10-130
9YP97580062016-06-222015-06-230
10XY68003482017-04-142016-04-140
11ZK69944712018-02-222017-02-220
12ZR44628792018-01-102017-01-100
13ZW42634532017-06-042016-06-040
14UW81765312018-04-102017-04-100
15RW86525382017-11-252016-11-250
16PQ31898502017-04-062016-04-061
17XN90158152018-11-212017-11-211
18SV16590582017-07-232016-07-230
19UQ81633282018-07-112017-07-110
20OT02871502018-04-232017-04-230
21TW21728022016-08-192015-08-200
22PP73602892017-04-172016-04-170
23OY65162602016-04-172015-04-180
24OS67085362018-10-012017-10-010
25QV48421912017-03-062016-03-060
26VT98073372018-01-142017-01-140
27PU54393352017-04-042016-04-040
28OJ54660962017-05-202016-05-200
29RR54471102018-07-112017-07-110
...............
945VR30290922018-06-082017-06-080
946TQ47637652016-03-082015-03-090
947XV09369442018-07-212017-07-210
948XO14202652017-04-032016-04-030
949QP08553062018-06-222017-06-220
950QW36223192016-06-082015-06-090
951US79280332016-06-012015-06-020
952YU22218562018-08-242017-08-240
953YY78623682017-04-252016-04-250
954SP11799292017-08-202016-08-200
955RZ27945322018-10-032017-10-030
956SV18696982016-09-082015-09-090
957VP10262842017-09-222016-09-220
958ZT72143532018-11-232017-11-230
959PZ89271192018-03-102017-03-101
960ZQ99236002018-08-022017-08-020
961OQ20491362017-07-182016-07-180
962VO48610352017-10-042016-10-040
963NU17269362017-02-092016-02-100
964YN82551932016-03-132015-03-140
965NY83669012018-05-162017-05-160
966PZ88623682017-08-092016-08-090
967SS81550482018-05-242017-05-240
968NS65409792018-09-222017-09-220
969NY82204762017-10-252016-10-250
970QY35122502016-07-112015-07-120
971XR92021232016-07-202015-07-210
972TU07698432016-08-202015-08-210
973UU95711262017-04-212016-04-210
974SV27965012016-01-102015-01-100
\n", "

975 rows × 4 columns

\n", "
" ], "text/plain": [ " POLICY_ID EXPIRY_DATE START_DATE LOW_MILEAGE_USE\n", "0 NW5567882 2017-09-19 2016-09-19 0\n", "1 UR4864804 2018-06-14 2017-06-14 0\n", "2 RR8595908 2016-08-23 2015-08-24 1\n", "3 RN5640634 2016-07-24 2015-07-25 0\n", "4 YY1229530 2017-01-07 2016-01-08 0\n", "5 XP3473763 2018-01-15 2017-01-15 0\n", "6 US5444269 2017-07-08 2016-07-08 1\n", "7 XR1994270 2017-08-11 2016-08-11 0\n", "8 VP6368585 2018-10-13 2017-10-13 0\n", "9 YP9758006 2016-06-22 2015-06-23 0\n", "10 XY6800348 2017-04-14 2016-04-14 0\n", "11 ZK6994471 2018-02-22 2017-02-22 0\n", "12 ZR4462879 2018-01-10 2017-01-10 0\n", "13 ZW4263453 2017-06-04 2016-06-04 0\n", "14 UW8176531 2018-04-10 2017-04-10 0\n", "15 RW8652538 2017-11-25 2016-11-25 0\n", "16 PQ3189850 2017-04-06 2016-04-06 1\n", "17 XN9015815 2018-11-21 2017-11-21 1\n", "18 SV1659058 2017-07-23 2016-07-23 0\n", "19 UQ8163328 2018-07-11 2017-07-11 0\n", "20 OT0287150 2018-04-23 2017-04-23 0\n", "21 TW2172802 2016-08-19 2015-08-20 0\n", "22 PP7360289 2017-04-17 2016-04-17 0\n", "23 OY6516260 2016-04-17 2015-04-18 0\n", "24 OS6708536 2018-10-01 2017-10-01 0\n", "25 QV4842191 2017-03-06 2016-03-06 0\n", "26 VT9807337 2018-01-14 2017-01-14 0\n", "27 PU5439335 2017-04-04 2016-04-04 0\n", "28 OJ5466096 2017-05-20 2016-05-20 0\n", "29 RR5447110 2018-07-11 2017-07-11 0\n", ".. ... ... ... ...\n", "945 VR3029092 2018-06-08 2017-06-08 0\n", "946 TQ4763765 2016-03-08 2015-03-09 0\n", "947 XV0936944 2018-07-21 2017-07-21 0\n", "948 XO1420265 2017-04-03 2016-04-03 0\n", "949 QP0855306 2018-06-22 2017-06-22 0\n", "950 QW3622319 2016-06-08 2015-06-09 0\n", "951 US7928033 2016-06-01 2015-06-02 0\n", "952 YU2221856 2018-08-24 2017-08-24 0\n", "953 YY7862368 2017-04-25 2016-04-25 0\n", "954 SP1179929 2017-08-20 2016-08-20 0\n", "955 RZ2794532 2018-10-03 2017-10-03 0\n", "956 SV1869698 2016-09-08 2015-09-09 0\n", "957 VP1026284 2017-09-22 2016-09-22 0\n", "958 ZT7214353 2018-11-23 2017-11-23 0\n", "959 PZ8927119 2018-03-10 2017-03-10 1\n", "960 ZQ9923600 2018-08-02 2017-08-02 0\n", "961 OQ2049136 2017-07-18 2016-07-18 0\n", "962 VO4861035 2017-10-04 2016-10-04 0\n", "963 NU1726936 2017-02-09 2016-02-10 0\n", "964 YN8255193 2016-03-13 2015-03-14 0\n", "965 NY8366901 2018-05-16 2017-05-16 0\n", "966 PZ8862368 2017-08-09 2016-08-09 0\n", "967 SS8155048 2018-05-24 2017-05-24 0\n", "968 NS6540979 2018-09-22 2017-09-22 0\n", "969 NY8220476 2017-10-25 2016-10-25 0\n", "970 QY3512250 2016-07-11 2015-07-12 0\n", "971 XR9202123 2016-07-20 2015-07-21 0\n", "972 TU0769843 2016-08-20 2015-08-21 0\n", "973 UU9571126 2017-04-21 2016-04-21 0\n", "974 SV2796501 2016-01-10 2015-01-10 0\n", "\n", "[975 rows x 4 columns]" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_policy" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Int64Index: 975 entries, 0 to 974\n", "Data columns (total 17 columns):\n", "HOUSEHOLD_ID 975 non-null object\n", "DRIVER_ID 975 non-null object\n", "POLICY_ID 975 non-null object\n", "CLAIM_ID 975 non-null object\n", "LOSS_EVENT_TIME 975 non-null object\n", "CLAIM_INIT_TIME 975 non-null object\n", "ODOMETER_AT_LOSS 975 non-null float64\n", "CLAIMS_AT_LOSS_DATE 975 non-null int64\n", "LOSS_LOCATION_LAT 975 non-null float64\n", "LOSS_LOCATION_LONG 975 non-null float64\n", "CLAIM_AMOUNT 975 non-null float64\n", "FLAG_FOR_FRAUD_INV 975 non-null int64\n", "DRIVERS_LICENSE_EXPIRY 975 non-null object\n", "DATE_AT_CURRENT_ADDRESS 975 non-null object\n", "EXPIRY_DATE 975 non-null object\n", "START_DATE 975 non-null object\n", "LOW_MILEAGE_USE 975 non-null int64\n", "dtypes: float64(4), int64(3), object(10)\n", "memory usage: 137.1+ KB\n" ] } ], "source": [ "df_f = pd.merge(df_claim, df_driver, on='DRIVER_ID', how='left')\n", "\n", "df_f = pd.merge(df_f,df_policy, on = 'POLICY_ID', how = 'inner')\n", "df_f.info()" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1.0 915\n", "0.0 60\n", "Name: SUSPICIOUS_CLAIM_TIME, dtype: int64" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Claim within 15 days of policy expiry (date of loss - insurance_policy.expiry)\n", "df_f[\"EXPIRY_DATE\"] = pd.to_datetime(df_f[\"EXPIRY_DATE\"])\n", "df_f[\"LOSS_EVENT_TIME\"] = pd.to_datetime(df_f[\"LOSS_EVENT_TIME\"])\n", "\n", "df_f[\"DAYS_FROM_LOSS\"] = df_f[\"LOSS_EVENT_TIME\"] - df_f[\"EXPIRY_DATE\"]\n", "df_f[\"DAYS_FROM_LOSS\"] = abs(df_f.DAYS_FROM_LOSS.dt.days)\n", "\n", "df_f.loc[df_f['DAYS_FROM_LOSS'] >= 15, 'SUSPICIOUS_CLAIM_TIME'] = 1\n", "df_f.loc[df_f['DAYS_FROM_LOSS'] < 15, 'SUSPICIOUS_CLAIM_TIME'] = 0\n", "df_f[\"SUSPICIOUS_CLAIM_TIME\"].value_counts()" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "# Expired drivers license (if date of loss > insurance_driver.drivers_license_expiry)\n", "df_f[\"DRIVERS_LICENSE_EXPIRY\"] = pd.to_datetime(df_f[\"DRIVERS_LICENSE_EXPIRY\"])\n", "\n", "\n", "df_f[\"DAYS_FROM_L_EXPIRY\"] = df_f[\"DRIVERS_LICENSE_EXPIRY\"] - df_f[\"LOSS_EVENT_TIME\"]\n", "df_f[\"DAYS_FROM_L_EXPIRY\"] = df_f.DAYS_FROM_L_EXPIRY.dt.days\n", "\n", "df_f.loc[df_f['DAYS_FROM_L_EXPIRY'] >= 0, 'EXPIRED_LICENSE'] = 0\n", "df_f.loc[df_f['DAYS_FROM_L_EXPIRY'] < 0, 'EXPIRED_LICENSE'] = 1\n" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "# Days living at current address (date of loss - insurance_driver.date_at_current_address)\n", "\n", "df_f[\"DATE_AT_CURRENT_ADDRESS\"] = pd.to_datetime(df_f[\"DATE_AT_CURRENT_ADDRESS\"])\n", "\n", "\n", "df_f[\"DAYS_AT_ADDRESS\"] = df_f[\"LOSS_EVENT_TIME\"] - df_f[\"DATE_AT_CURRENT_ADDRESS\"]\n", "df_f[\"DAYS_AT_ADDRESS\"] = abs(df_f.DAYS_AT_ADDRESS.dt.days)\n", "\n", "df_f.loc[df_f['DAYS_AT_ADDRESS'] >= 15, 'SUSPICIOUS_LIVING'] = 1\n", "df_f.loc[df_f['DAYS_AT_ADDRESS'] < 15, 'SUSPICIOUS_LIVING'] = 0\n" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "145432.118234 1\n", "234953.253233 1\n", "55515.574648 1\n", "123525.401515 1\n", "198472.566993 1\n", "41161.635017 1\n", "316044.542945 1\n", "343652.573991 1\n", "357034.135714 1\n", "444934.671171 1\n", "47761.055147 1\n", "106608.700207 1\n", "280168.774737 1\n", "97407.709325 1\n", "83715.343421 1\n", "836054.660714 1\n", "225418.465704 1\n", "119793.802198 1\n", "49174.210227 1\n", "154269.516340 1\n", "87012.862810 1\n", "55960.766821 1\n", "340284.423841 1\n", "55199.213855 1\n", "158134.051821 1\n", "148190.243875 1\n", "128341.984375 1\n", "388218.025735 1\n", "38327.581707 1\n", "311332.643229 1\n", " ..\n", "138485.866667 1\n", "355749.475610 1\n", "206160.525547 1\n", "172133.553061 1\n", "39974.457997 1\n", "129429.385563 1\n", "227492.647193 1\n", "581033.280000 1\n", "213403.812808 1\n", "129338.850177 1\n", "433650.444700 1\n", "428016.279255 1\n", "102579.963602 1\n", "924887.694444 1\n", "258935.458015 1\n", "219977.172330 1\n", "167467.854637 1\n", "110428.874687 1\n", "140508.777778 1\n", "189399.690217 1\n", "54898.636569 1\n", "42573.963602 1\n", "153467.345029 1\n", "901214.887681 1\n", "63433.405908 1\n", "98941.082789 1\n", "100299.475188 1\n", "61050.847328 1\n", "54548.889011 1\n", "170432.357027 1\n", "Name: MILES/YEAR, Length: 975, dtype: int64" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#7500/year\n", "df_f[\"START_DATE\"] = pd.to_datetime(df_f[\"START_DATE\"])\n", "#find number of days between policy creation and accident\n", "df_f[\"LENGTH_OF_POLICY\"]=(df_f[\"LOSS_EVENT_TIME\"] - df_f[\"START_DATE\"]).dt.days\n", "\n", "#convert to years\n", "df_f[\"LENGTH_OF_POLICY\"]=df_f[\"LENGTH_OF_POLICY\"]/365\n", "\n", "#divide Odometer at loss by years\n", "df_f[\"MILES/YEAR\"] = df_f[\"ODOMETER_AT_LOSS\"]/df_f[\"LENGTH_OF_POLICY\"]\n", "df_f[\"MILES/YEAR\"].value_counts()\n" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "# Conflict on whether a policyholder with a low mileage discount experienced a loss with high mileage at the point of loss\n", "df_f.loc[df_f[\"MILES/YEAR\"] <7500, 'LOW_MILEAGE_AT_LOSS'] = 1\n", "df_f.loc[df_f[\"MILES/YEAR\"] >=7500, 'LOW_MILEAGE_AT_LOSS'] = 0\n", "df_f.loc[df_f[\"LOW_MILEAGE_USE\"]==df_f[\"LOW_MILEAGE_AT_LOSS\"], 'SUSPICIOUS_MILEAGE'] = 0\n", "df_f.loc[df_f[\"LOW_MILEAGE_USE\"]!= df_f[\"LOW_MILEAGE_AT_LOSS\"], 'SUSPICIOUS_MILEAGE'] = 1\n" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "df_f.loc[df_f[\"CLAIM_AMOUNT\"] <3000, 'EXCESSIVE_CLAIM_AMOUNT'] = 0\n", "df_f.loc[df_f[\"CLAIM_AMOUNT\"] >=3000, 'EXCESSIVE_CLAIM_AMOUNT'] = 1\n" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "features = ['FLAG_FOR_FRAUD_INV', \n", " 'SUSPICIOUS_MILEAGE',\n", " 'EXPIRED_LICENSE',\n", " 'SUSPICIOUS_CLAIM_TIME',\n", " 'SUSPICIOUS_LIVING',\n", " 'EXCESSIVE_CLAIM_AMOUNT']\n", "df_model = df_f[features]\n", "#ensure all relevant features are integers\n", "df_model[\"SUSPICIOUS_LIVING\"] = df_model[\"SUSPICIOUS_LIVING\"].astype(int)\n", "df_model[\"EXPIRED_LICENSE\"] = df_model[\"EXPIRED_LICENSE\"].astype(int)\n", "df_model[\"SUSPICIOUS_CLAIM_TIME\"] = df_model[\"SUSPICIOUS_CLAIM_TIME\"].astype(int)\n", "df_model[\"SUSPICIOUS_MILEAGE\"] = df_model[\"SUSPICIOUS_MILEAGE\"].astype(int)\n", "df_model[\"EXCESSIVE_CLAIM_AMOUNT\"] = df_model[\"EXCESSIVE_CLAIM_AMOUNT\"].astype(int)\n" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FLAG_FOR_FRAUD_INVODOMETER_AT_LOSSCLAIMS_AT_LOSS_DATELOSS_LOCATION_LATLOSS_LOCATION_LONGCLAIM_AMOUNTLOW_MILEAGE_USEDAYS_FROM_LOSSSUSPICIOUS_CLAIM_TIMEDAYS_FROM_L_EXPIRYEXPIRED_LICENSEDAYS_AT_ADDRESSSUSPICIOUS_LIVINGLENGTH_OF_POLICYMILES/YEARLOW_MILEAGE_AT_LOSSSUSPICIOUS_MILEAGEEXCESSIVE_CLAIM_AMOUNT
00180783.8460461.49209141.857895-87.6750142521.1476270.094903117.0210900.933216894.5465730.0421793487.9560630.9964851.070159205170.3567010.00.0949030.086116
11175504.0463051.75862141.860133-87.67961726429.4802960.088670118.1600990.945813917.9605910.0369463252.4137931.0000000.960490230687.5789970.00.0886700.933498
\n", "
" ], "text/plain": [ " FLAG_FOR_FRAUD_INV ODOMETER_AT_LOSS CLAIMS_AT_LOSS_DATE \\\n", "0 0 180783.846046 1.492091 \n", "1 1 175504.046305 1.758621 \n", "\n", " LOSS_LOCATION_LAT LOSS_LOCATION_LONG CLAIM_AMOUNT LOW_MILEAGE_USE \\\n", "0 41.857895 -87.675014 2521.147627 0.094903 \n", "1 41.860133 -87.679617 26429.480296 0.088670 \n", "\n", " DAYS_FROM_LOSS SUSPICIOUS_CLAIM_TIME DAYS_FROM_L_EXPIRY EXPIRED_LICENSE \\\n", "0 117.021090 0.933216 894.546573 0.042179 \n", "1 118.160099 0.945813 917.960591 0.036946 \n", "\n", " DAYS_AT_ADDRESS SUSPICIOUS_LIVING LENGTH_OF_POLICY MILES/YEAR \\\n", "0 3487.956063 0.996485 1.070159 205170.356701 \n", "1 3252.413793 1.000000 0.960490 230687.578997 \n", "\n", " LOW_MILEAGE_AT_LOSS SUSPICIOUS_MILEAGE EXCESSIVE_CLAIM_AMOUNT \n", "0 0.0 0.094903 0.086116 \n", "1 0.0 0.088670 0.933498 " ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_f.groupby(\"FLAG_FOR_FRAUD_INV\", as_index=False).mean()" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EXPIRED_LICENSESUSPICIOUS_CLAIM_TIMESUSPICIOUS_LIVINGSUSPICIOUS_MILEAGEEXCESSIVE_CLAIM_AMOUNT
001101
111100
201111
301101
401100
\n", "
" ], "text/plain": [ " EXPIRED_LICENSE SUSPICIOUS_CLAIM_TIME SUSPICIOUS_LIVING \\\n", "0 0 1 1 \n", "1 1 1 1 \n", "2 0 1 1 \n", "3 0 1 1 \n", "4 0 1 1 \n", "\n", " SUSPICIOUS_MILEAGE EXCESSIVE_CLAIM_AMOUNT \n", "0 0 1 \n", "1 0 0 \n", "2 1 1 \n", "3 0 1 \n", "4 0 0 " ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#split data into x and y variables\n", "xVar = df_model[[\"EXPIRED_LICENSE\",\"SUSPICIOUS_CLAIM_TIME\",\"SUSPICIOUS_LIVING\",\"SUSPICIOUS_MILEAGE\",\"EXCESSIVE_CLAIM_AMOUNT\"]]\n", "yVar = df_model[\"FLAG_FOR_FRAUD_INV\"]\n", "xVar.head()\n" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(780, 5) (780,)\n", "(195, 5) (195,)\n" ] }, { "ename": "TypeError", "evalue": "'tuple' object is not callable", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mprint\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mprint\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mX_test\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_test\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0;34m(\u001b[0m\u001b[0;36m780\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;36m780\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;36m195\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;36m195\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;31m#train model\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mTypeError\u001b[0m: 'tuple' object is not callable" ] } ], "source": [ "#split into a test/train set\n", "X_train, X_test, y_train, y_test = train_test_split(xVar, yVar, test_size=0.2)\n", "print (X_train.shape, y_train.shape)\n", "print (X_test.shape, y_test.shape)\n", "(780, 5) (780,)\n", "(195, 5) (195,)\n", "#train model\n", "clf = RandomForestClassifier(n_jobs=2, random_state=0)\n", "\n", "clf.fit(X_train, y_train)\n", "RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n", " max_depth=None, max_features='auto', max_leaf_nodes=None,\n", " min_impurity_decrease=0.0, min_impurity_split=None,\n", " min_samples_leaf=1, min_samples_split=2,\n", " min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=2,\n", " oob_score=False, random_state=0, verbose=0, warm_start=False)\n", "#create confusion matrix to gut check model\n", "preds = clf.predict(X_test)\n", "pd.crosstab(y_test, preds, rownames=['Actual Result'], colnames=['Predicted Result'])\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3.6", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.9" } }, "nbformat": 4, "nbformat_minor": 1 }