{"paragraphs":[{"title":"Import python libraries and display settings","text":"%python\n\nimport pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport oml\n\npd.set_option('display.max_rows', 500)\npd.set_option('display.max_columns', 500)\npd.set_option('display.width', 1000)","user":"JIE","dateUpdated":"2021-06-21T13:18:33+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"text","editOnDblClick":false},"editorMode":"ace/mode/undefined","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[]},"interrupted":false,"jobName":"paragraph_1622836148862_-341059383","id":"20210604-194908_1482195142","dateCreated":"2021-03-31T19:18:01+0000","dateStarted":"2021-06-21T13:18:34+0000","dateFinished":"2021-06-21T13:18:35+0000","status":"FINISHED","progressUpdateIntervalMs":500,"commited":true,"focus":true,"$$hashKey":"object:40"},{"title":"Load the functions for Weight of Evidence and Information Value Calculation","text":"%python\r\ndef prior_count(DF, col, pos, neg):\r\n labels = DF[col].drop_duplicates().pull()\r\n assert len(labels) == 2\r\n assert pos in labels\r\n assert neg in labels\r\n return col, pos, DF[ DF[col] == pos].shape[0], neg, DF[DF[col] == neg].shape[0]\r\n\r\n\r\ndef mean_encoding_col(DF, col, priors):\r\n vals = DF[col].drop_duplicates().pull()\r\n target, pos, pos_cnt, neg, neg_cnt = priors\r\n res_df = pd.DataFrame(columns = ['VAL', 'MEAN_SCORE'])\r\n GROUP_CNT = DF.crosstab([col, target]).pull()\r\n K = len(vals)\r\n for val in vals:\r\n cond_pos = 0\r\n if GROUP_CNT[ (GROUP_CNT[col]==val) & (GROUP_CNT[target]== pos)].shape[0] > 0:\r\n nomi = (GROUP_CNT[ (GROUP_CNT[col]==val) & (GROUP_CNT[target]== pos)]['count'].values[0] )\r\n cond_pos = nomi*1.0/GROUP_CNT[GROUP_CNT[col]==val]['count'].sum()\r\n res_df = res_df.append({'VAL':val, 'MEAN_SCORE':cond_pos}, ignore_index = True)\r\n return res_df\r\n\r\ndef woe_col(DF, col, priors):\r\n vals = DF[col].drop_duplicates().pull()\r\n target, pos, pos_cnt, neg, neg_cnt = priors\r\n res_df = pd.DataFrame(columns = ['VAL', 'COND_POS', 'COND_NEG', 'WOE'])\r\n \r\n GROUP_CNT = DF.crosstab([col, target]).pull()\r\n K = len(vals)\r\n\r\n for val in vals:\r\n cond_pos = 1.0/(K + pos_cnt)\r\n cond_neg = 1.0/(K + neg_cnt)\r\n \r\n deno = 1\r\n nomi = 1\r\n \r\n if GROUP_CNT[ (GROUP_CNT[col]==val) & (GROUP_CNT[target]== pos)].shape[0] > 0:\r\n nomi = (GROUP_CNT[ (GROUP_CNT[col]==val) & (GROUP_CNT[target]== pos)]['count'].values[0] + 1 )\r\n cond_pos = nomi*1.0/(K + pos_cnt)\r\n \r\n \r\n if GROUP_CNT[ (GROUP_CNT[col]==val) & (GROUP_CNT[target]== neg)].shape[0] > 0:\r\n deno = (GROUP_CNT[ (GROUP_CNT[col]==val) & (GROUP_CNT[target]== neg)]['count'].values[0] + 1)\r\n cond_neg = deno*1.0/(K + neg_cnt)\r\n \r\n \r\n woe = np.log(nomi) - np.log(K + pos_cnt) - (np.log(deno) - np.log(K + neg_cnt))\r\n \r\n res_df = res_df.append({'VAL':val, 'COND_POS':cond_pos, 'COND_NEG':cond_neg, 'WOE':woe}, ignore_index = True)\r\n\r\n return res_df\r\n \r\ndef attach_woe_cat(DF, col, target, priors = None):\r\n if not priors:\r\n priors = prior_count(DF, target, 1, 0)\r\n \r\n woe_df = woe_col(DF, col, priors)\r\n \r\n WOE_DF = oml.push(woe_df)\r\n \r\n DF = DF.merge(WOE_DF, left_on = col, right_on = 'VAL', how = 'inner', suffixes = ['',''])\r\n \r\n DF = DF.drop(['VAL', 'COND_POS', 'COND_NEG'])\r\n \r\n _ = DF.rename({'WOE': col + '_WOE'})\r\n \r\n return DF\r\n\r\n \r\ndef attach_woe_num(DF, col, target, priors = None, bin_num = 20):\r\n if not priors:\r\n priors = prior_count(DF, target, 1, 0)\r\n binned_col = col + '_BIN'\r\n DF = DF.concat({ binned_col: DF[col].cut(bin_num)})\r\n \r\n DF = attach_woe_cat(DF, binned_col, target, priors)\r\n \r\n DF = DF.drop([binned_col])\r\n \r\n return DF\r\n\r\ndef information_value(DF, col, target, priors, bin_num = 20):\r\n if not priors:\r\n priors = prior_count(DF, target, 1, 0)\r\n \r\n if type(DF[col]) == oml.Float:\r\n binned_col = col + '_BIN'\r\n DF = DF.concat({ binned_col: DF[col].cut(bin_num)})\r\n col = binned_col \r\n woe_df = woe_col(DF, col, priors)\r\n \r\n woe_df['IV'] = woe_df['WOE']*(woe_df['COND_POS'] - woe_df['COND_NEG'])\r\n \r\n return woe_df['IV'].sum() \r\n ","user":"JIE","dateUpdated":"2021-06-22T14:50:32+0000","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/undefined","fontSize":9,"results":{},"editorSetting":{"language":"text","editOnDblClick":false},"title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[]},"interrupted":false,"jobName":"paragraph_1622836148862_-196697582","id":"20210604-194908_1227414976","dateCreated":"2021-03-31T19:17:55+0000","dateStarted":"2021-06-10T17:39:18+0000","dateFinished":"2021-06-10T17:39:18+0000","status":"FINISHED","progressUpdateIntervalMs":500,"commited":true,"$$hashKey":"object:41"},{"title":"Obtain a proxy object to the Customer Insurance Life Time Value table ","text":"%python\n\nCUST_DF = oml.sync(schema = 'JIE', table = 'CUSTOMER_INSURANCE_LTV')\n","user":"JIE","dateUpdated":"2021-06-07T18:59:47+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"text","editOnDblClick":false},"editorMode":"ace/mode/undefined","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[]},"interrupted":false,"jobName":"paragraph_1622836148862_1267726446","id":"20210604-194908_1977992398","dateCreated":"2021-03-31T19:18:43+0000","dateStarted":"2021-06-07T18:59:48+0000","dateFinished":"2021-06-07T18:59:48+0000","status":"FINISHED","progressUpdateIntervalMs":500,"commited":true,"$$hashKey":"object:42"},{"title":"Pick a subset of the columns","text":"%python\n\nCUST_DF = CUST_DF[['CUSTOMER_ID','MARITAL_STATUS', 'STATE', 'GENDER', 'PROFESSION', 'REGION', 'CREDIT_BALANCE', 'LTV_BIN', 'MORTGAGE_AMOUNT', 'BANK_FUNDS', 'NUM_DEPENDENTS', 'INCOME', 'CREDIT_CARD_LIMITS', 'BUY_INSURANCE']]\n","user":"JIE","dateUpdated":"2021-04-07T19:59:42+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"text","editOnDblClick":false},"editorMode":"ace/mode/undefined","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[]},"interrupted":false,"jobName":"paragraph_1622836148862_-1643323875","id":"20210604-194908_1877196815","dateCreated":"2021-03-31T19:23:22+0000","dateStarted":"2021-04-07T19:59:43+0000","dateFinished":"2021-04-07T19:59:43+0000","status":"FINISHED","progressUpdateIntervalMs":500,"commited":true,"$$hashKey":"object:43"},{"title":"Overview of the dataset","text":"%python\n\nz.show(CUST_DF.head())","user":"JIE","dateUpdated":"2021-04-12T17:29:40+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{"0":{"graph":{"mode":"table","height":300,"optionOpen":false,"setting":{"table":{"tableGridState":{"columns":[{"name":"MARITAL_STATUS","visible":true,"width":173,"sort":{},"filters":[{}],"pinned":""},{"name":"STATE","visible":true,"width":150,"sort":{},"filters":[{}],"pinned":""},{"name":"CREDIT_BALANCE","visible":true,"width":177,"sort":{},"filters":[{}],"pinned":""},{"name":"CUSTOMER_TENURE","visible":true,"width":189,"sort":{},"filters":[{}],"pinned":""},{"name":"MORTGAGE_AMOUNT","visible":true,"width":199,"sort":{},"filters":[{}],"pinned":""},{"name":"BANK_FUNDS","visible":true,"width":"*","sort":{},"filters":[{}],"pinned":""},{"name":"NUM_DEPENDENTS","visible":true,"width":"*","sort":{},"filters":[{}],"pinned":""},{"name":"HAS_CHILDREN","visible":true,"width":"*","sort":{},"filters":[{}],"pinned":""},{"name":"INCOME","visible":true,"width":"*","sort":{},"filters":[{}],"pinned":""},{"name":"CUSTOMER_ID","visible":true,"width":"*","sort":{},"filters":[{}],"pinned":""},{"name":"GENDER","visible":true,"width":"*","sort":{},"filters":[{}],"pinned":""},{"name":"PROFESSION","visible":true,"width":"*","sort":{},"filters":[{}],"pinned":""},{"name":"CREDIT_CARD_LIMITS","visible":true,"width":"*","sort":{},"filters":[{}],"pinned":""},{"name":"REGION","visible":true,"width":"*","sort":{},"filters":[{}],"pinned":""},{"name":"HOME_OWNERSHIP","visible":true,"width":"*","sort":{},"filters":[{}],"pinned":""},{"name":"NUM_ONLINE_TRANS","visible":true,"width":"*","sort":{},"filters":[{}],"pinned":""},{"name":"BUY_INSURANCE","visible":true,"width":"*","sort":{},"filters":[{}],"pinned":""},{"name":"MONTHLY_CHECKS","visible":true,"width":"*","sort":{},"filters":[{}],"pinned":""},{"name":"NUM_TRANS_KIOSK","visible":true,"width":"*","sort":{},"filters":[{}],"pinned":""},{"name":"AGE","visible":true,"width":"*","sort":{},"filters":[{}],"pinned":""},{"name":"MONEY_MONTLY_OVERDRAWN","visible":true,"width":"*","sort":{},"filters":[{}],"pinned":""},{"name":"LTV","visible":true,"width":"*","sort":{},"filters":[{}],"pinned":""},{"name":"TOTAL_AUTOM_PAYMENTS","visible":true,"width":"*","sort":{},"filters":[{}],"pinned":""},{"name":"NUM_TRANS_TELLER","visible":true,"width":"*","sort":{},"filters":[{}],"pinned":""},{"name":"CHECKING_BALANCE","visible":true,"width":"*","sort":{},"filters":[{}],"pinned":""},{"name":"NUM_TRANS_ATM","visible":true,"width":"*","sort":{},"filters":[{}],"pinned":""},{"name":"LTV_BIN","visible":true,"width":"*","sort":{},"filters":[{}],"pinned":""},{"name":"FIRST_NAME","visible":true,"width":"*","sort":{},"filters":[{}],"pinned":""},{"name":"NUM_MORTGAGES","visible":true,"width":"*","sort":{},"filters":[{}],"pinned":""},{"name":"CAR_OWNERSHIP","visible":true,"width":"*","sort":{},"filters":[{}],"pinned":""},{"name":"LAST_NAME","visible":true,"width":"*","sort":{},"filters":[{}],"pinned":""}],"scrollFocus":{},"selection":[],"grouping":{"grouping":[],"aggregations":[],"rowExpandedStates":{}},"treeView":{},"pagination":{"paginationCurrentPage":1,"paginationPageSize":250}},"tableColumnTypeState":{"names":{"MARITAL_STATUS":"string","STATE":"string","CREDIT_BALANCE":"string","CUSTOMER_TENURE":"string","MORTGAGE_AMOUNT":"string","BANK_FUNDS":"string","NUM_DEPENDENTS":"string","HAS_CHILDREN":"string","INCOME":"string","CUSTOMER_ID":"string","GENDER":"string","PROFESSION":"string","CREDIT_CARD_LIMITS":"string","REGION":"string","HOME_OWNERSHIP":"string","NUM_ONLINE_TRANS":"string","BUY_INSURANCE":"string","MONTHLY_CHECKS":"string","NUM_TRANS_KIOSK":"string","AGE":"string","MONEY_MONTLY_OVERDRAWN":"string","LTV":"string","TOTAL_AUTOM_PAYMENTS":"string","NUM_TRANS_TELLER":"string","CHECKING_BALANCE":"string","NUM_TRANS_ATM":"string","LTV_BIN":"string","FIRST_NAME":"string","NUM_MORTGAGES":"string","CAR_OWNERSHIP":"string","LAST_NAME":"string"},"updated":false},"tableOptionSpecHash":"[{\"name\":\"useFilter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable filter for columns\"},{\"name\":\"showPagination\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable pagination for better navigation\"},{\"name\":\"showAggregationFooter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable a footer for displaying aggregated values\"}]","tableOptionValue":{"useFilter":false,"showPagination":false,"showAggregationFooter":false},"updated":false,"initialized":false}},"commonSetting":{}}}},"editorSetting":{"language":"text","editOnDblClick":false},"editorMode":"ace/mode/undefined","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TABLE","data":"MARITAL_STATUS\tSTATE\tCREDIT_BALANCE\tCUSTOMER_TENURE\tMORTGAGE_AMOUNT\tBANK_FUNDS\tNUM_DEPENDENTS\tHAS_CHILDREN\tINCOME\tCUSTOMER_ID\tGENDER\tPROFESSION\tCREDIT_CARD_LIMITS\tREGION\tHOME_OWNERSHIP\tNUM_ONLINE_TRANS\tBUY_INSURANCE\tMONTHLY_CHECKS\tNUM_TRANS_KIOSK\tAGE\tMONEY_MONTLY_OVERDRAWN\tLTV\tTOTAL_AUTOM_PAYMENTS\tNUM_TRANS_TELLER\tCHECKING_BALANCE\tNUM_TRANS_ATM\tLTV_BIN\tFIRST_NAME\tNUM_MORTGAGES\tCAR_OWNERSHIP\tLAST_NAME\nMARRIED\tNY \t63\t3\t1200\t750\t3\t1\t58261\tCU14048 \tM \tPROF-3\t1500\tNorthEast\t1\t1200\tNo\t15\t1\t33\t53.38\t18365.25\t4364\t3\t3879\t3\tMEDIUM\tERIN\t1\t1\tTYSON\nMARRIED\tNV \t0\t3\t1091\t1400\t3\t0\t68632\tCU14049 \tM \tWaiter/Waitress\t1000\tSouthwest\t1\t1091\tYes\t10\t1\t27\t53.02\t25358.0\t510\t3\t25\t2\tHIGH\tDALLAS\t1\t1\tMACDONALD\nSINGLE\tMI \t0\t4\t0\t0\t0\t1\t59234\tCU14050 \tM \tAdministrative Assistant\t600\tMidwest\t0\t0\tNo\t5\t1\t38\t52.97\t15608.5\t1203\t0\t25\t2\tMEDIUM\tDARIN\t0\t1\tKILGORE\nOTHER\tTX \t0\t1\t15000\t11800\t1\t1\t67748\tCU4614 \tF \tClerical\t1600\tSouthwest\t1\t5000\tNo\t2\t3\t37\t54.2\t24137.0\t4096\t2\t25\t5\tHIGH\tREINA\t1\t1\tCRISP\nSINGLE\tMN \t0\t1\t319\t0\t1\t1\t57734\tCU4615 \tM \tProgrammer/Developer\t900\tWest\t1\t319\tNo\t0\t3\t27\t53.06\t20633.5\t507\t1\t25\t0\tMEDIUM\tCOLEMAN\t1\t1\tNICHOLSON\n"}]},"interrupted":false,"jobName":"paragraph_1622836148862_-1745860712","id":"20210604-194908_1931555717","dateCreated":"2021-03-31T19:21:39+0000","dateStarted":"2021-04-07T19:59:39+0000","dateFinished":"2021-04-07T19:59:40+0000","status":"FINISHED","progressUpdateIntervalMs":500,"commited":true,"$$hashKey":"object:44"},{"title":"Recode the flag variable BUY_INSURANCE for convenience","text":"%python\n\nCUST_DF = CUST_DF.replace(old = ['Yes'], new = [1.0], default = 0.0, columns = ['BUY_INSURANCE'])\n","user":"JIE","dateUpdated":"2021-04-07T19:59:45+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"text","editOnDblClick":false},"editorMode":"ace/mode/undefined","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[]},"interrupted":false,"jobName":"paragraph_1622836148862_-194125035","id":"20210604-194908_2095589116","dateCreated":"2021-04-01T20:17:33+0000","dateStarted":"2021-04-07T19:59:45+0000","dateFinished":"2021-04-07T19:59:46+0000","status":"FINISHED","progressUpdateIntervalMs":500,"commited":true,"$$hashKey":"object:45"},{"title":"Save the subset table","text":"%python\n\ntry:\n oml.drop(table = 'CUST_SUBSET_TBL')\nexcept:\n print(\"No such table\")\nCUST_SUBSET_DF = CUST_DF.materialize(table = 'CUST_SUBSET_TBL')","user":"JIE","dateUpdated":"2021-04-07T19:59:47+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"text","editOnDblClick":false},"editorMode":"ace/mode/undefined","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[]},"interrupted":false,"jobName":"paragraph_1622836148862_357269547","id":"20210604-194908_661675547","dateCreated":"2021-04-01T18:59:47+0000","dateStarted":"2021-04-07T19:59:48+0000","dateFinished":"2021-04-07T19:59:49+0000","status":"FINISHED","progressUpdateIntervalMs":500,"commited":true,"$$hashKey":"object:46"},{"title":"Get the proxy object for the subset table","text":"%python\n\nCUST_SUBSET_DF = oml.sync(table = 'CUST_SUBSET_TBL')","user":"JIE","dateUpdated":"2021-06-10T17:39:24+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"text","editOnDblClick":false},"editorMode":"ace/mode/undefined","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[]},"interrupted":false,"jobName":"paragraph_1622836148862_-609649101","id":"20210604-194908_1449263426","dateCreated":"2021-04-01T19:19:31+0000","dateStarted":"2021-06-10T17:39:25+0000","dateFinished":"2021-06-10T17:39:26+0000","status":"FINISHED","progressUpdateIntervalMs":500,"commited":true,"$$hashKey":"object:47"},{"title":"Get the categorical columns","text":"%python\n\ncat_cols = CUST_SUBSET_DF.drop(['CUSTOMER_ID']).select_types([oml.core.string.String]).columns\ncat_cols","user":"JIE","dateUpdated":"2021-06-07T18:37:52+0000","config":{"colWidth":6,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"text","editOnDblClick":false},"editorMode":"ace/mode/undefined","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TEXT","data":"['MARITAL_STATUS', 'STATE', 'GENDER', 'PROFESSION', 'REGION', 'LTV_BIN']\n"}]},"interrupted":false,"jobName":"paragraph_1622836148862_1946715905","id":"20210604-194908_749059344","dateCreated":"2021-03-31T19:21:52+0000","dateStarted":"2021-06-07T18:37:54+0000","dateFinished":"2021-06-07T18:37:54+0000","status":"FINISHED","progressUpdateIntervalMs":500,"commited":true,"$$hashKey":"object:48"},{"title":"Get the numerical columns","text":"%python\n\nnum_cols = CUST_SUBSET_DF.drop(['BUY_INSURANCE']).select_types([oml.core.string.Float]).columns\nnum_cols","user":"JIE","dateUpdated":"2021-06-07T18:37:56+0000","config":{"colWidth":6,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"text","editOnDblClick":false},"editorMode":"ace/mode/undefined","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TEXT","data":"['CREDIT_BALANCE', 'MORTGAGE_AMOUNT', 'BANK_FUNDS', 'NUM_DEPENDENTS', 'INCOME', 'CREDIT_CARD_LIMITS']\n"}]},"interrupted":false,"jobName":"paragraph_1622836148862_694252388","id":"20210604-194908_289911230","dateCreated":"2021-03-31T19:30:26+0000","dateStarted":"2021-06-07T18:37:57+0000","dateFinished":"2021-06-07T18:37:57+0000","status":"FINISHED","progressUpdateIntervalMs":500,"commited":true,"$$hashKey":"object:49"},{"title":"Compute WOE values and attach WOE columns for both categorical and numerical columns","text":"%python\n\ntarget = 'BUY_INSURANCE'\npriors = prior_count(CUST_SUBSET_DF, target, 1, 0)\n\nCUST_WOE_DF = CUST_SUBSET_DF\n\nfor col in cat_cols:\n CUST_WOE_DF = attach_woe_cat(CUST_WOE_DF, col, target, priors)\n\nfor col in num_cols:\n CUST_WOE_DF = attach_woe_num(CUST_WOE_DF, col, target, priors)\n","user":"JIE","dateUpdated":"2021-06-04T19:52:03+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"text","editOnDblClick":false},"editorMode":"ace/mode/undefined","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[]},"interrupted":false,"jobName":"paragraph_1622836148862_-1408114301","id":"20210604-194908_878880011","dateCreated":"2021-03-31T19:31:31+0000","dateStarted":"2021-06-04T19:52:04+0000","dateFinished":"2021-06-04T19:52:24+0000","status":"FINISHED","progressUpdateIntervalMs":500,"commited":true,"$$hashKey":"object:50"},{"title":"Mean Encoding for MARITAL_STATUS","text":"%python\ntarget = 'BUY_INSURANCE'\n\npriors = prior_count(CUST_SUBSET_DF, target, 1, 0)\n\n \nmean_info = mean_encoding_col(CUST_SUBSET_DF, 'MARITAL_STATUS', priors)\nz.show(mean_info.round(4).sort_values('VAL'))\n","user":"JIE","dateUpdated":"2021-06-07T17:41:25+0000","config":{"colWidth":3,"fontSize":9,"enabled":true,"results":{"0":{"graph":{"mode":"table","height":244,"optionOpen":false,"setting":{"table":{"tableGridState":{},"tableColumnTypeState":{"names":{"VAL":"string","MEAN_SCORE":"string"},"updated":false},"tableOptionSpecHash":"[{\"name\":\"useFilter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable filter for columns\"},{\"name\":\"showPagination\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable pagination for better navigation\"},{\"name\":\"showAggregationFooter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable a footer for displaying aggregated values\"}]","tableOptionValue":{"useFilter":false,"showPagination":false,"showAggregationFooter":false},"updated":false,"initialized":false}},"commonSetting":{}}}},"editorSetting":{"language":"sql","editOnDblClick":false},"editorMode":"ace/mode/undefined","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TABLE","data":"VAL\tMEAN_SCORE\nDIVORCED\t0.3364\nMARRIED\t0.2933\nOTHER\t0.2869\nSINGLE\t0.195\nWIDOWED\t0.3048\n"}]},"interrupted":false,"jobName":"paragraph_1622836358927_-1346429400","id":"20210604-195238_621042714","dateCreated":"2021-06-04T19:52:38+0000","dateStarted":"2021-06-07T17:41:26+0000","dateFinished":"2021-06-07T17:41:28+0000","status":"FINISHED","progressUpdateIntervalMs":500,"commited":true,"$$hashKey":"object:51"},{"title":"WOE values for MARITAL_STATUS","text":"%python\n\ntarget = 'BUY_INSURANCE'\npriors = prior_count(CUST_SUBSET_DF, target, 1, 0)\nwoe_info = woe_col(CUST_SUBSET_DF, 'MARITAL_STATUS', priors)\nz.show(woe_info[['VAL', 'WOE']].round(4).sort_values('VAL'))","user":"JIE","dateUpdated":"2021-06-07T19:07:49+0000","config":{"colWidth":3,"fontSize":9,"enabled":true,"results":{"0":{"graph":{"mode":"table","height":264,"optionOpen":false,"setting":{"table":{"tableGridState":{},"tableColumnTypeState":{"names":{"VAL":"string","WOE":"string"},"updated":false},"tableOptionSpecHash":"[{\"name\":\"useFilter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable filter for columns\"},{\"name\":\"showPagination\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable pagination for better navigation\"},{\"name\":\"showAggregationFooter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable a footer for displaying aggregated values\"}]","tableOptionValue":{"useFilter":false,"showPagination":false,"showAggregationFooter":false},"updated":false,"initialized":false}},"commonSetting":{}}}},"editorSetting":{"language":"text","editOnDblClick":false},"editorMode":"ace/mode/undefined","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TABLE","data":"VAL\tWOE\nDIVORCED\t0.313\nMARRIED\t0.1129\nOTHER\t0.098\nSINGLE\t-0.4251\nWIDOWED\t0.1702\n"}]},"interrupted":false,"jobName":"paragraph_1622836148862_1537732405","id":"20210604-194908_1926027988","dateCreated":"2021-04-09T18:45:37+0000","dateStarted":"2021-06-07T19:07:50+0000","dateFinished":"2021-06-07T19:07:51+0000","status":"FINISHED","progressUpdateIntervalMs":500,"commited":true,"$$hashKey":"object:52"},{"title":"Mean Encoding for REGION","text":"%python\ntarget = 'BUY_INSURANCE'\n\npriors = prior_count(CUST_SUBSET_DF, target, 1, 0)\n \nmean_info = mean_encoding_col(CUST_SUBSET_DF, 'REGION', priors)\nz.show(mean_info.round(4).sort_values('VAL'))","user":"JIE","dateUpdated":"2021-06-07T19:07:53+0000","config":{"colWidth":3,"fontSize":9,"enabled":true,"results":{"0":{"graph":{"mode":"table","height":264,"optionOpen":false,"setting":{"table":{"tableGridState":{},"tableColumnTypeState":{"names":{"VAL":"string","MEAN_SCORE":"string"},"updated":false},"tableOptionSpecHash":"[{\"name\":\"useFilter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable filter for columns\"},{\"name\":\"showPagination\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable pagination for better navigation\"},{\"name\":\"showAggregationFooter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable a footer for displaying aggregated values\"}]","tableOptionValue":{"useFilter":false,"showPagination":false,"showAggregationFooter":false},"updated":false,"initialized":false}},"commonSetting":{}}}},"editorSetting":{"language":"sql","editOnDblClick":false},"editorMode":"ace/mode/undefined","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TABLE","data":"VAL\tMEAN_SCORE\nMidwest\t0.2737\nNorthEast\t0.2682\nSouth\t0.3066\nSouthwest\t0.2508\nWest\t0.2656\n"}]},"interrupted":false,"jobName":"paragraph_1622837951325_-2134695959","id":"20210604-201911_106258994","dateCreated":"2021-06-04T20:19:11+0000","dateStarted":"2021-06-07T19:07:54+0000","dateFinished":"2021-06-07T19:07:55+0000","status":"FINISHED","progressUpdateIntervalMs":500,"commited":true,"$$hashKey":"object:53"},{"title":"WOE values for REGION ","text":"%python\n\n\ntarget = 'BUY_INSURANCE'\npriors = prior_count(CUST_SUBSET_DF, target, 1, 0)\n\nwoe_info = woe_col(CUST_SUBSET_DF, 'REGION', priors)\nz.show(woe_info[['VAL', 'WOE']].round(4).sort_values('VAL'))","user":"JIE","dateUpdated":"2021-06-10T17:39:47+0000","config":{"colWidth":3,"fontSize":9,"enabled":true,"results":{"0":{"graph":{"mode":"table","height":300,"optionOpen":false,"setting":{"table":{"tableGridState":{},"tableColumnTypeState":{"names":{"VAL":"string","WOE":"string"},"updated":false},"tableOptionSpecHash":"[{\"name\":\"useFilter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable filter for columns\"},{\"name\":\"showPagination\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable pagination for better navigation\"},{\"name\":\"showAggregationFooter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable a footer for displaying aggregated values\"}]","tableOptionValue":{"useFilter":false,"showPagination":false,"showAggregationFooter":false},"updated":false,"initialized":false}},"commonSetting":{}}}},"editorSetting":{"language":"text","editOnDblClick":false},"editorMode":"ace/mode/undefined","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TABLE","data":"VAL\tWOE\nMidwest\t0.0166\nNorthEast\t-0.0115\nSouth\t0.1774\nSouthwest\t-0.0997\nWest\t-0.0244\n"}]},"interrupted":false,"jobName":"paragraph_1622836148862_385874211","id":"20210604-194908_1929426080","dateCreated":"2021-04-09T18:48:05+0000","dateStarted":"2021-06-10T17:39:48+0000","dateFinished":"2021-06-10T17:39:49+0000","status":"FINISHED","progressUpdateIntervalMs":500,"commited":true,"$$hashKey":"object:54"},{"title":"Imbalanced class distribution Mean Encoding for MARITAL_STATUS","text":"%python\ntarget = 'BUY_INSURANCE'\nPOS_DF = CUST_SUBSET_DF[CUST_SUBSET_DF['BUY_INSURANCE'] == 1].sample(frac = 0.1)\nNEG_DF = CUST_SUBSET_DF[CUST_SUBSET_DF['BUY_INSURANCE'] == 0]\nCOMBINED_DF = NEG_DF.append(POS_DF, all = False)\npriors = prior_count(COMBINED_DF, target, 1, 0)\nmean_info = mean_encoding_col(COMBINED_DF, 'MARITAL_STATUS', priors)\nz.show(mean_info.round(4).sort_values('VAL'))","user":"JIE","dateUpdated":"2021-06-10T17:40:03+0000","config":{"colWidth":3,"fontSize":9,"enabled":true,"results":{"0":{"graph":{"mode":"table","height":218,"optionOpen":false,"setting":{"table":{"tableGridState":{},"tableColumnTypeState":{"names":{"VAL":"string","MEAN_SCORE":"string"},"updated":false},"tableOptionSpecHash":"[{\"name\":\"useFilter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable filter for columns\"},{\"name\":\"showPagination\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable pagination for better navigation\"},{\"name\":\"showAggregationFooter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable a footer for displaying aggregated values\"}]","tableOptionValue":{"useFilter":false,"showPagination":false,"showAggregationFooter":false},"updated":false,"initialized":false}},"commonSetting":{}}}},"editorSetting":{"language":"sql","editOnDblClick":false},"editorMode":"ace/mode/undefined","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TABLE","data":"VAL\tMEAN_SCORE\nDIVORCED\t0.0454\nMARRIED\t0.037\nOTHER\t0.0225\nSINGLE\t0.022\nWIDOWED\t0.0365\n"}]},"interrupted":false,"jobName":"paragraph_1622838058707_822195695","id":"20210604-202058_2126723117","dateCreated":"2021-06-04T20:20:58+0000","dateStarted":"2021-06-10T17:40:04+0000","dateFinished":"2021-06-10T17:40:06+0000","status":"FINISHED","progressUpdateIntervalMs":500,"commited":true,"$$hashKey":"object:55"},{"title":"Imbalanced class distribution Mean Encoding for REGION","text":"%python\ntarget = 'BUY_INSURANCE'\n\npriors = prior_count(COMBINED_DF, target, 1, 0)\nmean_info = mean_encoding_col(COMBINED_DF, 'REGION', priors)\nz.show(mean_info.round(4))","user":"JIE","dateUpdated":"2021-06-10T17:35:46+0000","config":{"colWidth":3,"fontSize":9,"enabled":true,"results":{"0":{"graph":{"mode":"table","height":224,"optionOpen":false,"setting":{"table":{"tableGridState":{},"tableColumnTypeState":{"names":{"VAL":"string","MEAN_SCORE":"string"},"updated":false},"tableOptionSpecHash":"[{\"name\":\"useFilter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable filter for columns\"},{\"name\":\"showPagination\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable pagination for better navigation\"},{\"name\":\"showAggregationFooter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable a footer for displaying aggregated values\"}]","tableOptionValue":{"useFilter":false,"showPagination":false,"showAggregationFooter":false},"updated":false,"initialized":false}},"commonSetting":{}}}},"editorSetting":{"language":"sql","editOnDblClick":false},"editorMode":"ace/mode/undefined","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TABLE","data":"VAL\tMEAN_SCORE\nSouth\t0.0394\nNorthEast\t0.0348\nMidwest\t0.0343\nWest\t0.0329\nSouthwest\t0.031\n"}]},"interrupted":false,"jobName":"paragraph_1623081137525_-2091826265","id":"20210607-155217_1112003943","dateCreated":"2021-06-07T15:52:17+0000","dateStarted":"2021-06-07T17:21:53+0000","dateFinished":"2021-06-07T17:21:54+0000","status":"FINISHED","progressUpdateIntervalMs":500,"commited":true,"$$hashKey":"object:56"},{"title":"Class distribution of the original data","text":"%python\n\nz.show(CUST_SUBSET_DF.crosstab(['BUY_INSURANCE']))","user":"JIE","dateUpdated":"2021-06-22T14:44:39+0000","config":{"colWidth":3,"fontSize":9,"enabled":true,"results":{"0":{"graph":{"mode":"multiBarChart","height":366,"optionOpen":false,"setting":{"table":{"tableGridState":{},"tableColumnTypeState":{"names":{"BUY_INSURANCE":"string","count":"string"},"updated":false},"tableOptionSpecHash":"[{\"name\":\"useFilter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable filter for columns\"},{\"name\":\"showPagination\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable pagination for better navigation\"},{\"name\":\"showAggregationFooter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable a footer for displaying aggregated values\"}]","tableOptionValue":{"useFilter":false,"showPagination":false,"showAggregationFooter":false},"updated":false,"initialized":false},"multiBarChart":{"rotate":{"degree":"-45"},"xLabelStatus":"default"}},"commonSetting":{},"keys":[{"name":"BUY_INSURANCE","index":0,"aggr":"sum"}],"groups":[],"values":[{"name":"count","index":1,"aggr":"sum"}]},"helium":{}}},"editorSetting":{"language":"sql","editOnDblClick":false},"editorMode":"ace/mode/undefined","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TABLE","data":"BUY_INSURANCE\tcount\n0\t10127\n1\t3753\n"}]},"interrupted":false,"jobName":"paragraph_1623081335787_-2079865104","id":"20210607-155535_1775557918","dateCreated":"2021-06-07T15:55:35+0000","dateStarted":"2021-06-07T17:20:43+0000","dateFinished":"2021-06-07T17:20:44+0000","status":"FINISHED","progressUpdateIntervalMs":500,"commited":true,"$$hashKey":"object:57"},{"title":"Removed 90% of the positive cases","text":"%python\n\nz.show(COMBINED_DF.crosstab(['BUY_INSURANCE']))","user":"JIE","dateUpdated":"2021-06-22T14:44:39+0000","config":{"colWidth":3,"fontSize":9,"enabled":true,"results":{"0":{"graph":{"mode":"multiBarChart","height":364,"optionOpen":false,"setting":{"multiBarChart":{"rotate":{"degree":"-45"},"xLabelStatus":"default"}},"commonSetting":{},"keys":[{"name":"BUY_INSURANCE","index":0,"aggr":"sum"}],"groups":[],"values":[{"name":"count","index":1,"aggr":"sum"}]},"helium":{}}},"editorSetting":{"language":"sql","editOnDblClick":false},"editorMode":"ace/mode/undefined","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TABLE","data":"BUY_INSURANCE\tcount\n0\t10127\n1\t371\n"}]},"interrupted":false,"jobName":"paragraph_1623081280038_32874495","id":"20210607-155440_349550821","dateCreated":"2021-06-07T15:54:40+0000","dateStarted":"2021-06-07T17:28:03+0000","dateFinished":"2021-06-07T17:28:05+0000","status":"FINISHED","progressUpdateIntervalMs":500,"commited":true,"$$hashKey":"object:58"},{"title":"Comparison of the WOE on the original dataset and the dataset with imbalanced class","text":"%python\ntarget = 'BUY_INSURANCE'\n\npriors = prior_count(CUST_SUBSET_DF, target, 1, 0)\nwoe_info = woe_col(CUST_SUBSET_DF, 'REGION', priors)\n\n\npriors = prior_count(COMBINED_DF, target, 1, 0)\nwoe_info_imb = woe_col(COMBINED_DF, 'REGION', priors)\n\nwoe_info_comp = woe_info.merge(woe_info_imb, on = ['VAL'], suffixes = [\"_ORIG\", \"_IMB\"])\nz.show(woe_info_comp[['VAL', 'WOE_ORIG', 'WOE_IMB'] ].round(4).sort_values('VAL'))","user":"JIE","dateUpdated":"2021-06-15T15:35:39+0000","config":{"colWidth":6,"fontSize":9,"enabled":true,"results":{"0":{"graph":{"mode":"table","height":300,"optionOpen":false,"setting":{"table":{"tableGridState":{},"tableColumnTypeState":{"names":{"VAL":"string","WOE_IMB":"string","WOE_ORIG":"string"},"updated":false},"tableOptionSpecHash":"[{\"name\":\"useFilter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable filter for columns\"},{\"name\":\"showPagination\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable pagination for better navigation\"},{\"name\":\"showAggregationFooter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable a footer for displaying aggregated values\"}]","tableOptionValue":{"useFilter":false,"showPagination":false,"showAggregationFooter":false},"updated":false,"initialized":false}},"commonSetting":{}}}},"editorSetting":{"language":"sql","editOnDblClick":false},"editorMode":"ace/mode/undefined","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TABLE","data":"VAL\tWOE_ORIG\tWOE_IMB\nMidwest\t0.0166\t0.0735\nNorthEast\t-0.0115\t0.0703\nSouth\t0.1774\t0.1031\nSouthwest\t-0.0997\t-0.151\nWest\t-0.0244\t-0.1538\n"}]},"interrupted":false,"jobName":"paragraph_1623346501324_853292724","id":"20210610-173501_1565199805","dateCreated":"2021-06-10T17:35:01+0000","dateStarted":"2021-06-10T17:48:06+0000","dateFinished":"2021-06-10T17:48:09+0000","status":"FINISHED","progressUpdateIntervalMs":500,"commited":true,"$$hashKey":"object:59"},{"title":"Comparison of the WOE on the original dataset and the dataset with imbalanced class","text":"%python\ntarget = 'BUY_INSURANCE'\n\npriors = prior_count(CUST_SUBSET_DF, target, 1, 0)\nwoe_info = woe_col(CUST_SUBSET_DF, 'MARITAL_STATUS', priors)\n\n\npriors = prior_count(COMBINED_DF, target, 1, 0)\nwoe_info_imb = woe_col(COMBINED_DF, 'MARITAL_STATUS', priors)\n\nwoe_info_comp = woe_info.merge(woe_info_imb, on = ['VAL'], suffixes = [\"_ORIG\", \"_IMB\"])\nz.show(woe_info_comp[['VAL', 'WOE_ORIG', 'WOE_IMB'] ].round(4).sort_values('VAL'))","user":"JIE","dateUpdated":"2021-06-15T15:35:43+0000","config":{"colWidth":6,"fontSize":9,"enabled":true,"results":{"0":{"graph":{"mode":"table","height":300,"optionOpen":false,"setting":{"table":{"tableGridState":{},"tableColumnTypeState":{"names":{"VAL":"string","WOE_ORIG":"string","WOE_IMB":"string"},"updated":false},"tableOptionSpecHash":"[{\"name\":\"useFilter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable filter for columns\"},{\"name\":\"showPagination\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable pagination for better navigation\"},{\"name\":\"showAggregationFooter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable a footer for displaying aggregated values\"}]","tableOptionValue":{"useFilter":false,"showPagination":false,"showAggregationFooter":false},"updated":false,"initialized":false}},"commonSetting":{}}}},"editorSetting":{"language":"sql","editOnDblClick":false},"editorMode":"ace/mode/undefined","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TABLE","data":"VAL\tWOE_ORIG\tWOE_IMB\nDIVORCED\t0.313\t0.3233\nMARRIED\t0.1129\t0.1086\nOTHER\t0.098\t-0.0189\nSINGLE\t-0.4251\t-0.4228\nWIDOWED\t0.1702\t0.1457\n"}]},"interrupted":false,"jobName":"paragraph_1623346857515_-1576800998","id":"20210610-174057_1128802774","dateCreated":"2021-06-10T17:40:57+0000","dateStarted":"2021-06-10T17:48:35+0000","dateFinished":"2021-06-10T17:48:37+0000","status":"FINISHED","progressUpdateIntervalMs":500,"commited":true,"$$hashKey":"object:60"},{"title":"Compute Information Value and attach IV columns for both categorical and numerical columns","text":"%python\n\ndef iv_level(iv):\n level = ''\n if iv < 0.02:\n level = 'unpredictive'\n elif iv < 0.1:\n level = 'weak'\n elif iv < 0.3:\n level = 'medium'\n else:\n level = 'strong'\n return level\n\ntarget = 'BUY_INSURANCE'\npriors = prior_count(CUST_SUBSET_DF, target, 1, 0)\n\nres_df = pd.DataFrame(columns = ['COLUMN', 'IV', 'LEVEL'])\n\nfor col in cat_cols:\n iv = information_value(CUST_SUBSET_DF, col, target, priors)\n level = iv_level(iv)\n res_df = res_df.append({'COLUMN':col, 'IV': iv, 'LEVEL': level}, ignore_index = True)\n print(col + \" IV: %.4f Level: %s\" % (iv, level))\n\n\nfor col in num_cols:\n iv = information_value(CUST_SUBSET_DF, col, target, priors)\n level = iv_level(iv)\n res_df = res_df.append({'COLUMN':col, 'IV': iv, 'LEVEL': level}, ignore_index = True)\n print(col + \" IV: %.4f Level %s\" % (iv, level))\n\n\nz.show(res_df.sort_values(by = 'IV').round(4))","user":"JIE","dateUpdated":"2021-06-07T18:38:01+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{"0":{"graph":{"mode":"table","height":300,"optionOpen":false,"setting":{"table":{"tableGridState":{},"tableColumnTypeState":{"names":{"COLUMN":"string","IV":"string"},"updated":false},"tableOptionSpecHash":"[{\"name\":\"useFilter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable filter for columns\"},{\"name\":\"showPagination\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable pagination for better navigation\"},{\"name\":\"showAggregationFooter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable a footer for displaying aggregated values\"}]","tableOptionValue":{"useFilter":false,"showPagination":false,"showAggregationFooter":false},"updated":false,"initialized":false}},"commonSetting":{}}},"1":{"graph":{"mode":"table","height":300,"optionOpen":false,"setting":{"table":{"tableGridState":{},"tableColumnTypeState":{"names":{"COLUMN":"string","IV":"string","LEVEL":"string"},"updated":false},"tableOptionSpecHash":"[{\"name\":\"useFilter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable filter for columns\"},{\"name\":\"showPagination\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable pagination for better navigation\"},{\"name\":\"showAggregationFooter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable a footer for displaying aggregated values\"}]","tableOptionValue":{"useFilter":false,"showPagination":false,"showAggregationFooter":false},"updated":false,"initialized":false},"multiBarChart":{"rotate":{"degree":"-45"},"xLabelStatus":"default"}},"commonSetting":{},"keys":[{"name":"COLUMN","index":0,"aggr":"sum"}],"groups":[],"values":[{"name":"IV","index":1,"aggr":"sum"}]},"helium":{}}},"editorSetting":{"language":"text","editOnDblClick":false},"editorMode":"ace/mode/undefined","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TEXT","data":"MARITAL_STATUS IV: 0.0894 Level: weak\nSTATE IV: 0.0450 Level: weak\nGENDER IV: 0.0830 Level: weak\nPROFESSION IV: 0.2603 Level: medium\nREGION IV: 0.0034 Level: unpredictive\nLTV_BIN IV: 0.0097 Level: unpredictive\nCREDIT_BALANCE IV: 0.0261 Level weak\nMORTGAGE_AMOUNT IV: 0.0264 Level weak\nBANK_FUNDS IV: 0.0626 Level weak\nNUM_DEPENDENTS IV: 0.2476 Level medium\nINCOME IV: 0.0146 Level unpredictive\nCREDIT_CARD_LIMITS IV: 0.0674 Level weak\n"},{"type":"TABLE","data":"COLUMN\tIV\tLEVEL\nREGION\t0.0034\tunpredictive\nLTV_BIN\t0.0097\tunpredictive\nINCOME\t0.0146\tunpredictive\nCREDIT_BALANCE\t0.0261\tweak\nMORTGAGE_AMOUNT\t0.0264\tweak\nSTATE\t0.045\tweak\nBANK_FUNDS\t0.0626\tweak\nCREDIT_CARD_LIMITS\t0.0674\tweak\nGENDER\t0.083\tweak\nMARITAL_STATUS\t0.0894\tweak\nNUM_DEPENDENTS\t0.2476\tmedium\nPROFESSION\t0.2603\tmedium\n"}]},"interrupted":false,"jobName":"paragraph_1622836148862_-1668255382","id":"20210604-194908_1600532799","dateCreated":"2021-04-06T18:32:53+0000","dateStarted":"2021-06-07T18:38:02+0000","dateFinished":"2021-06-07T18:38:11+0000","status":"FINISHED","progressUpdateIntervalMs":500,"commited":true,"$$hashKey":"object:61"},{"title":"Rank Information value","text":"%python\n\nz.show(res_df.round(4).sort_values('IV', ascending = False))","user":"JIE","dateUpdated":"2021-06-22T14:44:39+0000","config":{"colWidth":10,"fontSize":9,"enabled":true,"results":{"0":{"graph":{"mode":"multiBarChart","height":300,"optionOpen":false,"setting":{"table":{"tableGridState":{},"tableColumnTypeState":{"names":{"COLUMN":"string","IV":"string"},"updated":false},"tableOptionSpecHash":"[{\"name\":\"useFilter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable filter for columns\"},{\"name\":\"showPagination\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable pagination for better navigation\"},{\"name\":\"showAggregationFooter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable a footer for displaying aggregated values\"}]","tableOptionValue":{"useFilter":false,"showPagination":false,"showAggregationFooter":false},"updated":false,"initialized":false},"multiBarChart":{"rotate":{"degree":"-45"},"xLabelStatus":"default"}},"commonSetting":{},"keys":[{"name":"COLUMN","index":0,"aggr":"sum"}],"groups":[],"values":[{"name":"IV","index":1,"aggr":"sum"}]},"helium":{}}},"editorSetting":{"language":"text","editOnDblClick":false},"editorMode":"ace/mode/undefined","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TABLE","data":"COLUMN\tIV\tLEVEL\nPROFESSION\t0.2603\tmedium\nNUM_DEPENDENTS\t0.2476\tmedium\nMARITAL_STATUS\t0.0894\tweak\nGENDER\t0.083\tweak\nCREDIT_CARD_LIMITS\t0.0674\tweak\nBANK_FUNDS\t0.0626\tweak\nSTATE\t0.045\tweak\nMORTGAGE_AMOUNT\t0.0264\tweak\nCREDIT_BALANCE\t0.0261\tweak\nINCOME\t0.0146\tunpredictive\nLTV_BIN\t0.0097\tunpredictive\nREGION\t0.0034\tunpredictive\n"}]},"interrupted":false,"jobName":"paragraph_1622836148862_-1505289848","id":"20210604-194908_2114821413","dateCreated":"2021-04-06T18:57:45+0000","dateStarted":"2021-06-07T18:38:37+0000","dateFinished":"2021-06-07T18:38:38+0000","status":"FINISHED","progressUpdateIntervalMs":500,"commited":true,"$$hashKey":"object:62"},{"title":"Drop the columns with information value less than 0.02","text":"%python\n\nCUST_WOE_DF = CUST_WOE_DF.drop(['REGION_WOE', 'LTV_BIN_WOE'])","user":"JIE","dateUpdated":"2021-04-07T20:05:31+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"text","editOnDblClick":false},"editorMode":"ace/mode/undefined","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[]},"interrupted":false,"jobName":"paragraph_1622836148862_-1510773164","id":"20210604-194908_1787028042","dateCreated":"2021-04-01T20:39:23+0000","dateStarted":"2021-04-07T20:05:32+0000","dateFinished":"2021-04-07T20:05:32+0000","status":"FINISHED","progressUpdateIntervalMs":500,"commited":true,"$$hashKey":"object:63"},{"title":"Check the table with WOE values attached","text":"%python\n\nz.show(CUST_WOE_DF.head().round(4))","user":"JIE","dateUpdated":"2021-04-07T20:05:34+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{"0":{"graph":{"mode":"table","height":300,"optionOpen":false,"setting":{"table":{"tableGridState":{},"tableColumnTypeState":{"names":{"CUSTOMER_ID":"string","MARITAL_STATUS":"string","STATE":"string","GENDER":"string","PROFESSION":"string","REGION":"string","CREDIT_BALANCE":"string","LTV_BIN":"string","MORTGAGE_AMOUNT":"string","BANK_FUNDS":"string","NUM_DEPENDENTS":"string","INCOME":"string","CREDIT_CARD_LIMITS":"string","BUY_INSURANCE":"string","MARITAL_STATUS_WOE":"string","STATE_WOE":"string","GENDER_WOE":"string","PROFESSION_WOE":"string","CREDIT_BALANCE_BIN_WOE":"string","MORTGAGE_AMOUNT_BIN_WOE":"string","BANK_FUNDS_BIN_WOE":"string","NUM_DEPENDENTS_BIN_WOE":"string","INCOME_BIN_WOE":"string","CREDIT_CARD_LIMITS_BIN_WOE":"string"},"updated":false},"tableOptionSpecHash":"[{\"name\":\"useFilter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable filter for columns\"},{\"name\":\"showPagination\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable pagination for better navigation\"},{\"name\":\"showAggregationFooter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable a footer for displaying aggregated values\"}]","tableOptionValue":{"useFilter":false,"showPagination":false,"showAggregationFooter":false},"updated":false,"initialized":false}},"commonSetting":{}}}},"editorSetting":{"language":"text","editOnDblClick":false},"editorMode":"ace/mode/undefined","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TABLE","data":"CUSTOMER_ID\tMARITAL_STATUS\tSTATE\tGENDER\tPROFESSION\tREGION\tCREDIT_BALANCE\tLTV_BIN\tMORTGAGE_AMOUNT\tBANK_FUNDS\tNUM_DEPENDENTS\tINCOME\tCREDIT_CARD_LIMITS\tBUY_INSURANCE\tMARITAL_STATUS_WOE\tSTATE_WOE\tGENDER_WOE\tPROFESSION_WOE\tCREDIT_BALANCE_BIN_WOE\tMORTGAGE_AMOUNT_BIN_WOE\tBANK_FUNDS_BIN_WOE\tNUM_DEPENDENTS_BIN_WOE\tINCOME_BIN_WOE\tCREDIT_CARD_LIMITS_BIN_WOE\nCU10044 \tOTHER\tDC \tF \tIT Staff\tNorthEast\t0\tHIGH\t15000\t14000\t2\t64744\t800\t0\t0.098\t0.0537\t0.3961\t0.2609\t0.0238\t0.0735\t0.6096\t-0.0569\t0.0035\t-0.2053\nCU1636 \tOTHER\tMI \tF \tPROF-25\tMidwest\t0\tHIGH\t15000\t11200\t1\t61032\t900\t1\t0.098\t-0.0445\t0.3961\t0.2479\t0.0238\t0.0735\t0.6096\t0.266\t-0.1101\t-0.2053\nCU2031 \tOTHER\tMI \tF \tIT Staff\tMidwest\t137200\tMEDIUM\t40000\t25800\t5\t60394\t1500\t0\t0.098\t-0.0445\t0.3961\t0.2609\t-0.8562\t0.0735\t0.3721\t0.2165\t-0.1101\t0.0007\nCU5561 \tOTHER\tMI \tF \tIT Staff\tMidwest\t0\tHIGH\t90000\t24000\t0\t62264\t900\t1\t0.098\t-0.0445\t0.3961\t0.2609\t0.0238\t1.683\t0.3721\t-1.427\t-0.1101\t-0.2053\nCU5088 \tOTHER\tMI \tF \tIT Staff\tMidwest\t0\tHIGH\t40000\t11050\t3\t63276\t900\t1\t0.098\t-0.0445\t0.3961\t0.2609\t0.0238\t0.0735\t0.6096\t0.1057\t0.0035\t-0.2053\n"}]},"interrupted":false,"jobName":"paragraph_1622836148862_702078048","id":"20210604-194908_475069966","dateCreated":"2021-03-31T19:32:37+0000","dateStarted":"2021-04-07T20:05:35+0000","dateFinished":"2021-04-07T20:05:36+0000","status":"FINISHED","progressUpdateIntervalMs":500,"commited":true,"$$hashKey":"object:64"},{"title":"Get the columns with WOE values","text":"%python\n\nwoe_cols = [ col for col in CUST_WOE_DF.columns if col.endswith('WOE') ]\n","user":"JIE","dateUpdated":"2021-04-07T16:49:20+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"text","editOnDblClick":false},"editorMode":"ace/mode/undefined","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[]},"interrupted":false,"jobName":"paragraph_1622836148862_-143749313","id":"20210604-194908_1917967218","dateCreated":"2021-03-31T20:11:36+0000","dateStarted":"2021-04-07T16:49:20+0000","dateFinished":"2021-04-07T16:49:20+0000","status":"FINISHED","progressUpdateIntervalMs":500,"commited":true,"$$hashKey":"object:65"},{"title":"Create a table that contains all WOE columns only","text":"%python\n\n\ntry:\n oml.drop(table = 'CUST_WOE_TBL')\nexcept:\n print(\"No such table\")\n_ = CUST_WOE_DF[['CUSTOMER_ID', 'BUY_INSURANCE'] + woe_cols].materialize(table = 'CUST_WOE_TBL')","user":"JIE","dateUpdated":"2021-04-07T16:49:20+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"text","editOnDblClick":false},"editorMode":"ace/mode/undefined","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[]},"interrupted":false,"jobName":"paragraph_1622836148862_1217631508","id":"20210604-194908_1613206696","dateCreated":"2021-03-31T20:16:36+0000","dateStarted":"2021-04-07T16:49:21+0000","dateFinished":"2021-04-07T16:49:23+0000","status":"FINISHED","progressUpdateIntervalMs":500,"commited":true,"$$hashKey":"object:66"},{"title":"Get the proxy object of the WOE value table","text":"%python\n\nCUST_WOE_DF = oml.sync(table = 'CUST_WOE_TBL')","user":"JIE","dateUpdated":"2021-06-21T13:18:43+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"text","editOnDblClick":false},"editorMode":"ace/mode/undefined","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[]},"interrupted":false,"jobName":"paragraph_1622836148862_741690473","id":"20210604-194908_365149012","dateCreated":"2021-03-31T20:24:25+0000","dateStarted":"2021-06-21T13:18:44+0000","dateFinished":"2021-06-21T13:18:45+0000","status":"FINISHED","progressUpdateIntervalMs":500,"commited":true,"$$hashKey":"object:67"},{"title":"Overview of the WOE value table","text":"%python\n\nz.show(CUST_WOE_DF.head().round(4))\n","user":"JIE","dateUpdated":"2021-04-07T20:06:07+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{"0":{"graph":{"mode":"table","height":300,"optionOpen":false,"setting":{"table":{"tableGridState":{},"tableColumnTypeState":{"names":{"CUSTOMER_ID":"string","BUY_INSURANCE":"string","MARITAL_STATUS_WOE":"string","STATE_WOE":"string","GENDER_WOE":"string","PROFESSION_WOE":"string","CREDIT_BALANCE_BIN_WOE":"string","MORTGAGE_AMOUNT_BIN_WOE":"string","BANK_FUNDS_BIN_WOE":"string","NUM_DEPENDENTS_BIN_WOE":"string","INCOME_BIN_WOE":"string","CREDIT_CARD_LIMITS_BIN_WOE":"string"},"updated":false},"tableOptionSpecHash":"[{\"name\":\"useFilter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable filter for columns\"},{\"name\":\"showPagination\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable pagination for better navigation\"},{\"name\":\"showAggregationFooter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable a footer for displaying aggregated values\"}]","tableOptionValue":{"useFilter":false,"showPagination":false,"showAggregationFooter":false},"updated":false,"initialized":false}},"commonSetting":{}}}},"editorSetting":{"language":"text","editOnDblClick":false},"editorMode":"ace/mode/undefined","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TABLE","data":"CUSTOMER_ID\tBUY_INSURANCE\tMARITAL_STATUS_WOE\tSTATE_WOE\tGENDER_WOE\tPROFESSION_WOE\tCREDIT_BALANCE_BIN_WOE\tMORTGAGE_AMOUNT_BIN_WOE\tBANK_FUNDS_BIN_WOE\tNUM_DEPENDENTS_BIN_WOE\tINCOME_BIN_WOE\tCREDIT_CARD_LIMITS_BIN_WOE\nCU2711 \t0\t0.098\t-0.0445\t-0.2111\t-0.1193\t0.0238\t0.0735\t0.4351\t0.266\t-0.2635\t0.4372\nCU3853 \t1\t0.098\t-0.0629\t0.3961\t0.2609\t0.0238\t0.0735\t0.4351\t0.266\t0.0035\t-0.2053\nCU5582 \t0\t0.098\t0.2924\t0.3961\t0.2609\t0.0238\t0.0735\t0.4351\t0.266\t0.2553\t0.9896\nCU3619 \t1\t0.098\t0.1044\t0.3961\t0.3981\t0.0238\t-0.06\t0.4351\t0.2165\t0.276\t0.0814\nCU10412 \t0\t0.098\t0.1044\t0.3961\t0.2609\t-1.0946\t-0.802\t0.4351\t0.1057\t0.276\t0.0814\n"}]},"interrupted":false,"jobName":"paragraph_1622836148862_1412195729","id":"20210604-194908_251172716","dateCreated":"2021-03-31T20:25:22+0000","dateStarted":"2021-04-07T20:06:06+0000","dateFinished":"2021-04-07T20:06:06+0000","status":"FINISHED","progressUpdateIntervalMs":500,"commited":true,"$$hashKey":"object:68"},{"title":"Split the table into train/test, prepared for classification task","text":"%python\n\ndat = CUST_WOE_DF.split(seed = 1)\n\ntrain_x = dat[0].drop(['BUY_INSURANCE'])\ntrain_y = dat[0]['BUY_INSURANCE']\ntest_x = dat[1]\ntest_y = dat[1]['BUY_INSURANCE']","user":"JIE","dateUpdated":"2021-04-08T14:57:40+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"text","editOnDblClick":false},"editorMode":"ace/mode/undefined","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[]},"interrupted":false,"jobName":"paragraph_1622836148862_-700866983","id":"20210604-194908_1913538442","dateCreated":"2021-03-31T20:25:35+0000","dateStarted":"2021-04-08T14:57:40+0000","dateFinished":"2021-04-08T14:57:42+0000","status":"FINISHED","progressUpdateIntervalMs":500,"commited":true,"$$hashKey":"object:69"},{"title":"Fit Generalized Linear Model","text":"%python\n\nsetting = dict()\nglm_mod = oml.glm(\"classification\", **setting)\nglm_mod.fit(train_x, train_y, case_id = 'CUSTOMER_ID')","user":"JIE","dateUpdated":"2021-04-08T14:57:44+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"text","editOnDblClick":false},"editorMode":"ace/mode/undefined","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TEXT","data":"\nAlgorithm Name: Generalized Linear Model\n\nMining Function: CLASSIFICATION\n\nTarget: BUY_INSURANCE\n\nSettings: \n setting name setting value\n0 ALGO_NAME ALGO_GENERALIZED_LINEAR_MODEL\n1 CLAS_WEIGHTS_BALANCED OFF\n2 GLMS_CONF_LEVEL .95\n3 GLMS_FTR_GENERATION GLMS_FTR_GENERATION_DISABLE\n4 GLMS_FTR_SELECTION GLMS_FTR_SELECTION_DISABLE\n5 ODMS_DETAILS ODMS_ENABLE\n6 ODMS_MISSING_VALUE_TREATMENT ODMS_MISSING_VALUE_AUTO\n7 ODMS_SAMPLING ODMS_SAMPLING_DISABLE\n8 PREP_AUTO ON\n\nComputed Settings: \n setting name setting value\n0 GLMS_CONV_TOLERANCE .0000050000000000000004\n1 GLMS_NUM_ITERATIONS 30\n2 GLMS_RIDGE_REGRESSION GLMS_RIDGE_REG_DISABLE\n3 GLMS_SOLVER GLMS_SOLVER_CHOL\n\nGlobal Statistics: \n attribute name attribute value\n0 AIC_INTERCEPT 11346.7\n1 AIC_MODEL 10457.3\n2 CONVERGED YES\n3 DEPENDENT_MEAN 0.267746\n4 ITERATIONS 5\n5 LR_CHI_SQ 909.422\n6 LR_CHI_SQ_P_VALUE 0\n7 LR_DF 10\n8 NEG2_LL_INTERCEPT 11344.7\n9 NEG2_LL_MODEL 10435.3\n10 NUM_PARAMS 11\n11 NUM_ROWS 9763\n12 PCT_CORRECT 0.730923\n13 PCT_INCORRECT 0.269077\n14 PCT_TIED 0\n15 PSEUDO_R_SQ_CS 0.088943\n16 PSEUDO_R_SQ_N 0.129439\n17 RANK_DEFICIENCY 0\n18 SC_INTERCEPT 11353.9\n19 SC_MODEL 10536.3\n20 VALID_COVARIANCE_MATRIX YES\n\nAttributes: \nBANK_FUNDS_BIN_WOE\nCREDIT_BALANCE_BIN_WOE\nCREDIT_CARD_LIMITS_BIN_WOE\nGENDER_WOE\nINCOME_BIN_WOE\nMARITAL_STATUS_WOE\nMORTGAGE_AMOUNT_BIN_WOE\nNUM_DEPENDENTS_BIN_WOE\nPROFESSION_WOE\nSTATE_WOE\n\nPartition: NO\n\nCoefficients: \n\n NONREFERENCE attribute name attribute value coefficient std error t value p value significance code\n0 1 (Intercept) None -1.018448 0.024590 1715.366512 0.000000e+00 ***\n1 1 BANK_FUNDS_BIN_WOE None 0.774955 0.106323 53.124648 3.130397e-13 ***\n2 1 CREDIT_BALANCE_BIN_WOE None 1.238533 0.181091 46.775792 7.959053e-12 ***\n3 1 CREDIT_CARD_LIMITS_BIN_WOE None 0.382300 0.095618 15.985625 6.382528e-05 ***\n4 1 GENDER_WOE None 0.238381 0.096658 6.082220 1.365491e-02 *\n5 1 INCOME_BIN_WOE None 0.525478 0.201209 6.820457 9.011949e-03 **\n6 1 MARITAL_STATUS_WOE None 0.317487 0.095888 10.962875 9.295550e-04 ***\n7 1 MORTGAGE_AMOUNT_BIN_WOE None -0.301905 0.170277 3.143604 7.622516e-02 .\n8 1 NUM_DEPENDENTS_BIN_WOE None 0.601325 0.065160 85.165201 2.744497e-20 ***\n9 1 PROFESSION_WOE None 0.769723 0.057832 177.149185 2.031870e-40 ***\n10 1 STATE_WOE None 0.856382 0.121559 49.631971 1.854666e-12 ***\n\nSignif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 '\n\nFit Details: \n\n name value\n0 AIC_INTERCEPT 11346.676632\n1 AIC_MODEL 10457.254569\n2 DEPENDENT_MEAN 0.267746\n3 ITERATIONS 5.000000\n4 LR_CHI_SQ 909.422063\n5 LR_CHI_SQ_P_VALUE 0.000000\n6 LR_DF 10.000000\n7 MODEL_CONVERGED 1.000000\n8 NEG2_LL_INTERCEPT 11344.676632\n9 NEG2_LL_MODEL 10435.254569\n10 NUM_PARAMS 11.000000\n11 NUM_ROWS 9763.000000\n12 PCT_CORRECT 0.730923\n13 PCT_INCORRECT 0.269077\n14 PCT_TIED 0.000000\n15 PSEUDO_R_SQ_CS 0.088943\n16 PSEUDO_R_SQ_N 0.129439\n17 RANK_DEFICIENCY 0.000000\n18 SC_INTERCEPT 11353.862987\n19 SC_MODEL 10536.304474\n20 VALID_COVARIANCE_MATRIX 1.000000\n\nRank: \n\n11\n\nDeviance: \n\n10435.254569\n\nAIC: \n\n10457.254569\n\nNull Deviance: \n\n11344.676632\n\nDF Residual: \n\n9752.0\n\nDF Null: \n\n9762.0\n\nConverged: \n\nTrue\n\n\n"}]},"interrupted":false,"jobName":"paragraph_1622836148862_-659298135","id":"20210604-194908_785387447","dateCreated":"2021-03-31T20:34:22+0000","dateStarted":"2021-04-08T14:57:45+0000","dateFinished":"2021-04-08T14:57:50+0000","status":"FINISHED","progressUpdateIntervalMs":500,"commited":true,"$$hashKey":"object:70"},{"title":"Prepare a table that consists of customer ID, prediction results, probability and the target","text":"%python\n\nGLM_WOE_RES_DF = glm_mod.predict(test_x, \n supplemental_cols = test_x[['CUSTOMER_ID', 'BUY_INSURANCE']])\nGLM_WOE_RES_PROB = glm_mod.predict_proba(test_x, supplemental_cols = test_x['CUSTOMER_ID'])\nGLM_WOE_RES_DF = GLM_WOE_RES_DF.merge(GLM_WOE_RES_PROB, how = \"inner\", on = 'CUSTOMER_ID', suffixes = [\"\", \"\"])\nGLM_WOE_RES_DF = GLM_WOE_RES_DF.materialize()\n\nz.show(GLM_WOE_RES_DF.head().round(4)) ","user":"JIE","dateUpdated":"2021-04-08T14:58:17+0000","config":{"colWidth":7,"fontSize":9,"enabled":true,"results":{"0":{"graph":{"mode":"table","height":300,"optionOpen":false,"setting":{"table":{"tableGridState":{"columns":[{"name":"CUSTOMER_ID","visible":true,"width":220,"sort":{},"filters":[{}],"pinned":""},{"name":"BUY_INSURANCE","visible":true,"width":196,"sort":{},"filters":[{}],"pinned":""},{"name":"PREDICTION","visible":true,"width":"*","sort":{},"filters":[{}],"pinned":""},{"name":"PROBABILITY_OF_0","visible":true,"width":"*","sort":{},"filters":[{}],"pinned":""},{"name":"PROBABILITY_OF_1","visible":true,"width":"*","sort":{},"filters":[{}],"pinned":""}],"scrollFocus":{},"selection":[],"grouping":{"grouping":[],"aggregations":[],"rowExpandedStates":{}},"treeView":{},"pagination":{"paginationCurrentPage":1,"paginationPageSize":250}},"tableColumnTypeState":{"names":{"CUSTOMER_ID":"string","BUY_INSURANCE":"string","PREDICTION":"string","PROBABILITY_OF_0":"string","PROBABILITY_OF_1":"string"},"updated":false},"tableOptionSpecHash":"[{\"name\":\"useFilter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable filter for columns\"},{\"name\":\"showPagination\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable pagination for better navigation\"},{\"name\":\"showAggregationFooter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable a footer for displaying aggregated values\"}]","tableOptionValue":{"useFilter":false,"showPagination":false,"showAggregationFooter":false},"updated":false,"initialized":false}},"commonSetting":{}}}},"editorSetting":{"language":"text","editOnDblClick":false},"editorMode":"ace/mode/undefined","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TABLE","data":"CUSTOMER_ID\tBUY_INSURANCE\tPREDICTION\tPROBABILITY_OF_0\tPROBABILITY_OF_1\nCU6461 \t0\t0\t0.9937\t0.0063\nCU896 \t0\t0\t0.9936\t0.0064\nCU2647 \t0\t0\t0.9931\t0.0069\nCU122 \t0\t0\t0.9928\t0.0072\nCU1805 \t1\t0\t0.9913\t0.0087\n"}]},"interrupted":false,"jobName":"paragraph_1622836148862_41238867","id":"20210604-194908_650188834","dateCreated":"2021-03-31T20:45:15+0000","dateStarted":"2021-04-08T14:57:55+0000","dateFinished":"2021-04-08T14:57:57+0000","status":"FINISHED","progressUpdateIntervalMs":500,"commited":true,"$$hashKey":"object:71"},{"title":"Load the function for visualizing the classification metric","text":"%python\n\n\ndef pred_plot(pred_new, prediction_col, probability_col, target, name, settings): \n import numpy as np\n import matplotlib\n import matplotlib.pyplot as plt\n\n from sklearn.metrics import roc_curve\n from sklearn.metrics import roc_auc_score,auc\n\n conf_matrix = pred_new.crosstab(target,prediction_col,pivot=True)\n # Statistics\n cf_local = conf_matrix.pull()\n TN = cf_local.iloc[0,1]\n TP = cf_local.iloc[1,2]\n FP = cf_local.iloc[0,2]\n FN = cf_local.iloc[1,1]\n TPR = TP/(TP+FN)\n FPR = FP/(FP+TN)\n TNR = TN/(TN+FP)\n FNR = FN/(FN+TP)\n Precision = TP/(TP+FP)\n Accuracy = (TP+TN)/(TP+TN+FP+FN)\n NPV = TN/(FN+TN)\n DetectionRate = TN/(FN+TN)\n BalancedAccuracy = (TPR+TNR)/2\n # Estimated AUC via Triangle (not very precise) could be\n # AUC = (1/2)*FPR*TPR + (1/2)*(1-FPR)*(1-TPR) + (1-FPR)*TPR\n \n # Compute real AUC using roc_curve by loading the\n # data locally\n pred_local = pred_new.sample(frac = 0.1).pull()\n fpr, tpr, _ = roc_curve(pred_local[[target]],pred_local[[probability_col]])\n AUC = auc(fpr, tpr)\n \n F1Score = 2*Precision*TPR/(Precision+TPR)\n MathewsCorrCoef = ((TP*TN)-(FP*FN))/((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))**0.5\n # Store all statistics to export\n statistics = {'Algorithm' : name,\n 'Algorithm_setting' : settings,\n 'TN' : TN,\n 'TP' : TP,\n 'FP' : FP,\n 'FN' : FN,\n 'TPR' : TPR,\n 'FPR' : FPR,\n 'TNR' : TNR,\n 'FNR' : FNR,\n 'Precision' : Precision,\n 'Accuracy' : Accuracy,\n 'NPV' : NPV,\n 'DetectionRate' : DetectionRate,\n 'BalancedAccuracy' : BalancedAccuracy,\n 'AUC' : AUC,\n 'F1Score' : F1Score,\n 'MathewsCorrCoef' : MathewsCorrCoef\n }\n # Nice round stats for printing to screen\n TOTAL = TP+TN+FP+FN\n TN_P = (TN/TOTAL*100).round(2)\n FP_P = (FP/TOTAL*100).round(2)\n FN_P = (FN/TOTAL*100).round(2)\n TP_P = (TP/TOTAL*100).round(2)\n # # # Print the output nicely on Zeppelin native Table\n # if table:\n # print(\"%table CONFUSION MATRIX\\tPREDICTED 0\\tPREDICTED 1\\nACTUAL 0\\t\"+\n # \"True Negative: \"+str(TN)+\" (\"+str(TN_P)+\"%)\\t\"+\n # \"False Positive: \"+str(FP)+\" (\"+str(FP_P)+\"%)\\nACTUAL 1\\t\"+\n # \"False Negative: \"+str(FN)+\" (\"+str(FN_P)+\"%)\\t\"+\n # \"True Positive: \"+str(TP)+\" (\"+str(TP_P)+\"%)\\n\"+\n # \"Accuracy: \"+str((Accuracy*100).round(4))+\"%\"\n # )\n # return \n TARGET = target\n # Multiple Charts for Evaluation\n fig, axes = plt.subplots(nrows=1, ncols=4,figsize=[20,5])\n ax1, ax2, ax3, ax4 = axes.flatten()\n fig.suptitle('Model Evaluation for '+str(name), size=16)\n\n # Statistics\n ax1.axis('off')\n \n\n # Function to return rounded numbers if the string is float, return\n # integers otherwise and return characters if not a number\n def round_if_float(content):\n try:\n val = float(content)\n except ValueError:\n return(content)\n else:\n if val.is_integer():\n return(int(content))\n else:\n return(round(float(content),4))\n\n for num, name in enumerate(statistics):\n ax1.text(0.01, \n (-num*0.06+0.94),\n \"{0}: {1}\".format(name,round_if_float(statistics[name])),\n ha='left', \n va='bottom', \n fontsize=12)\n \n # Produce Lift Chart\n ax2.set_title('Lift Chart')\n data = pred_local.sort_values(by=probability_col, ascending=False)\n data['row_id'] = range(0,0+len(data))\n data['decile'] = ( data['row_id'] / (len(data)/10) ).astype(int)\n lift = data.groupby('decile')[TARGET].agg(['count','sum'])\n lift.columns = ['count', TARGET]\n lift['decile'] = range(1,11)\n\n data_ideal = pred_local.sort_values(by=TARGET, ascending=False)\n data_ideal['row_id'] = range(0,0+len(data))\n data_ideal['decile'] = ( data_ideal['row_id'] / (len(data_ideal)/10) ).astype(int)\n lift_ideal = data_ideal.groupby('decile')[TARGET].agg(['count','sum'])\n lift_ideal.columns = ['count', 'IDEAL']\n lift['IDEAL']=lift_ideal['IDEAL']\n\n ax2.bar(lift['decile'],lift['IDEAL']/lift['count'],color='darkorange', label='Ideal')\n ax2.bar(lift['decile'],lift[TARGET]/lift['count'],color='blue', alpha=0.6, label='Model')\n ax2.axhline((lift[TARGET]/lift['count']).mean(), color='grey', linestyle='--', label='Avg TARGET')\n ax2.set_ylim(0,1.15)\n ax2.set_xlabel('Decile', size=13)\n ax2.set_ylabel('Percent of Actual Targets', size=13)\n # Print labels.\n for dec in lift['decile']:\n ax2.text(dec, lift[lift.decile==dec][TARGET]/lift[lift.decile==dec]['count'] + 0.05, \n (\"%.0f\" % int(round((lift[(lift.decile==dec)][TARGET]/lift[lift.decile==dec]['count'])*100,0)))+\"%\",\n ha='center', va='bottom')\n\n ax2.legend(loc=\"upper right\")\n\n # Limits and Labels for RAW Data\n\n\n # Produce Gains Chart\n ax3.set_title('Distributions of Predictions')\n ax3.hist(pred_local[pred_local[TARGET]==1][probability_col], density=True, bins=25, alpha=.5, color='blue', label='1')\n ax3.hist(pred_local[pred_local[TARGET]==0][probability_col], density=True, bins=25, alpha=.5, color='darkorange', label='0')\n ax3.axvline(.5, color='grey', linestyle='--', label='Cutoff at 0.5')\n ax3.set_xlim([0,1])\n ax3.set_xlabel('Probability of 1', size=13)\n ax3.set_ylabel('Density', size=13)\n ax3.legend(loc=\"upper right\")\n\n # ROC curve Chart\n ax4.set_title('ROC Curve')\n ax4.plot(fpr, tpr, color='blue', lw=2, label='ROC curve')\n ax4.plot([0, 1], [0, 1], lw=2, linestyle='--', color='grey', label='Random guess')\n ax4.annotate('AUC ='+str((AUC).round(4)), xy=(0.5, 0.4), xycoords='axes fraction', size=13)\n ax4.annotate('Precision ='+str((Precision).round(4)), xy=(0.45, 0.35), xycoords='axes fraction', size=13)\n ax4.annotate('Recall ='+str((TPR).round(4)), xy=(0.4, 0.3), xycoords='axes fraction', size=13)\n ax4.annotate('Accuracy ='+str((Accuracy).round(4)), xy=(0.35, 0.25), xycoords='axes fraction', size=13)\n ax4.annotate('F1 Score ='+str((F1Score).round(4)), xy=(0.3, 0.2), xycoords='axes fraction', size=13)\n ax4.set_xlim([-0.02, 1.02])\n ax4.set_ylim([0.0, 1.02])\n ax4.set_xlabel('False Positive Rate', size=13)\n ax4.set_ylabel('True Positive Rate', size=13)\n ax4.legend(loc=\"lower right\")\n","user":"JIE","dateUpdated":"2021-06-10T17:53:22+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"sql","editOnDblClick":false},"editorMode":"ace/mode/undefined","editorHide":true,"title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[]},"interrupted":false,"jobName":"paragraph_1622836148862_345913533","id":"20210604-194908_1825093729","dateCreated":"2021-03-31T20:45:38+0000","dateStarted":"2021-04-07T16:49:36+0000","dateFinished":"2021-04-07T16:49:36+0000","status":"FINISHED","progressUpdateIntervalMs":500,"commited":true,"$$hashKey":"object:72"},{"title":"Plot the result","text":"%python\n\npred_plot(GLM_WOE_RES_DF[['BUY_INSURANCE','PREDICTION','PROBABILITY_OF_1']], 'PREDICTION', 'PROBABILITY_OF_1', 'BUY_INSURANCE', 'Generalized Linear Model', settings = '')\n","user":"JIE","dateUpdated":"2021-06-10T17:53:08+0000","config":{"colWidth":12,"fontSize":9,"enabled":false,"results":{},"editorSetting":{"language":"text","editOnDblClick":false},"editorMode":"ace/mode/undefined","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"HTML","data":"