{ "cells": [ { "cell_type": "code", "execution_count": 2, "id": "1315bbf9-9949-4347-bae7-71a952d4fc7a", "metadata": { "tags": [] }, "outputs": [], "source": [ "import vaex\n", "\n", "# from datetime import datetime\n", "# Code for initially converting to hdf5 format\n", "# d_parser = lambda x: datetime.strptime(x, '%Y-%m-%d')\n", "# df= vaex.from_csv('train_data.csv', convert=True, chunk_size=100_000, parse_dates=['S_2'], \n", "# date_parser=d_parser)\n", "df=vaex.open('train_data.csv.hdf5')" ] }, { "cell_type": "code", "execution_count": 3, "id": "e1112f0e-0252-4563-bac9-c5d1058b9601", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(5531451, 190)" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.shape #No rows and features" ] }, { "cell_type": "code", "execution_count": 4, "id": "38c9bdbf-ea0f-4b40-ad33-bc4c2555ca67", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
# customer_ID S_2 P_2 D_39 B_1 B_2 R_1 S_3 D_41 B_3 D_42 D_43 D_44 B_4 D_45 B_5 R_2 D_46 D_47 D_48 D_49 B_6 B_7 B_8 D_50 D_51 B_9 R_3 D_52 P_3 B_10 D_53 S_5 B_11 S_6 D_54 R_4 S_7 B_12 S_8 D_55 D_56 B_13 R_5 D_58 S_9 B_14 D_59 D_60 D_61 B_15 S_11 D_62D_63 D_64 D_65 B_16 B_17 B_18 B_19 D_66 B_20 D_68 S_12 R_6 S_13 B_21 D_69 B_22 D_70 D_71 D_72 S_15 B_23 D_73 P_4 D_74 D_75 D_76 B_24 R_7 D_77 B_25 B_26 D_78 D_79 R_8 R_9 S_16 D_80 R_10 R_11 B_27 D_81 D_82 S_17 R_12 B_28 R_13 D_83 R_14 R_15 D_84 R_16 B_29 B_30 S_18 D_86 D_87 R_17 R_18 D_88 B_31 S_19 R_19 B_32 S_20 R_20 R_21 B_33 D_89 R_22 R_23 D_91 D_92 D_93 D_94 R_24 R_25 D_96 S_22 S_23 S_24 S_25 S_26 D_102 D_103 D_104 D_105 D_106 D_107 B_36 B_37 R_26 R_27 B_38 D_108 D_109 D_110 D_111 B_39 D_112 B_40 S_27 D_113 D_114 D_115 D_116 D_117 D_118 D_119 D_120 D_121 D_122 D_123 D_124 D_125 D_126 D_127 D_128 D_129 B_41 B_42 D_130 D_131 D_132 D_133 R_28 D_134 D_135 D_136 D_137 D_138 D_139 D_140 D_141 D_142 D_143 D_144 D_145
0'0000099d6bd597052cdcda90ffabf56573fe9d7c79be5fb...2017-03-09 00:00:00.0000000000.9384690.001733340.008724451.006840.009227720.1240350.00877113 0.00470924 nan nan0.0006301350.08098630.7089060.1706 0.006204030.3585870.5253510.255736 nan0.06390220.05941570.006465580.1486981.335860.008206740.0014225 0.2073340.7364630.0962188 nan0.02338110.002768060.008321651.001520.008298440.1613450.1482660.9229980.3545960.1520250.1180750.001881790.1586120.06572840.01838460.06364650.1996170.3082330.01636060.4016190.091071 CR O 0.007126160.00766527 nan0.6529840.00852044 nan0.00472983 60.2720080.008362540.5152220.002644030.0090133 0.004807510.008341720.1194030.0048019 0.1082710.0508819 nan0.007554430.08042160.0690668 nan0.004326790.00756245 nan0.007728650.0002718280.001575740.004239360.00143399 nan0.002270940.00406052 0.007121090.00245606 0.0023103 0.003531980.5066120.00803302 1.009820.08468260.00381998 0.0070426 0.0004379550.006451630.000829520.00505487 nan 00.005720420.00708447 nan0.0001983080.00890741 nan 10.002537210.005177360.006626180.009705140.007781590.002449961.0011 0.002665330.007478760.006892811.503671.006130.00356854 0.00887059 0.003949730.003647140.004950030.89409 0.1355610.9111910.9745390.0012434 0.7666881.008691.004590.893734 nan0.6700410.009968480.00457161 nan1.00895 2 nan0.00432553 nan nan nan1.007340.21006 0.6769220.00787114 10.23825 0 40.23212 0.236266 00.70228 0.4343450.0030567 0.6865160.00873972 11.003321.007821.000080.00680497 nan0.002051690.00597188 nan0.004345060.00153473 nan nan nan nan nan0.002427040.003706270.00381782 nan0.000569240.0006098370.00267421
1'0000099d6bd597052cdcda90ffabf56573fe9d7c79be5fb...2017-04-07 00:00:00.0000000000.9366650.005775440.004923351.000650.006151310.12675 0.0007983590.00271358 nan nan0.00252627 0.06941920.7127950.1132390.006205670.35363 0.5213110.223329 nan0.06526110.05774380.001614010.1497231.339790.008373240.001984430.2027780.7208860.099804 nan0.03059860.002749360.002482071.009030.005136180.1409510.14353 0.9194140.3267570.1562010.1187370.001609960.1484590.09393540.01303480.06550140.1513870.2650260.01768790.4063260.0868048CR O 0.002413240.00714816 nan0.6470930.00223779 nan0.00387926 60.18897 0.004029570.5090480.004193120.007842380.001283160.006523810.1406119.36286e-050.1010180.0404689 nan0.004832170.08141320.0741664 nan0.004202760.00530352 nan0.001864130.0009788890.009895840.007597280.000509316 nan0.009810230.0001265090.005965810.0003953910.001326730.0077727 0.5008550.0007604421.009460.08184320.0003466250.007789350.00431088 0.0023325 0.009468790.00375319 nan 00.007584340.0066773 nan0.00114229 0.00590701 nan 10.0084272 0.008979160.001854110.009923780.005987440.002246821.006780.002507690.006827270.002837081.503581.005790.0005709010.0003907760.008351290.008849970.003180080.9021350.1363330.9198760.9756240.004561380.7860071.000081.004120.906841 nan0.6686470.003921 0.00465385 nan1.00321 2 nan0.00870721 nan nan nan1.007650.1840930.8222810.0034444 10.247217 0 40.2435320.241885 00.7070170.4305010.001305850.6864140.000755019 11.008391.004331.008340.00440716 nan0.001033560.00483756 nan0.007494780.00493136 nan nan nan nan nan0.003954210.003167090.00503163 nan0.009576480.00549205 0.00921683
" ], "text/plain": [ " # customer_ID S_2 P_2 D_39 B_1 B_2 R_1 S_3 D_41 B_3 D_42 D_43 D_44 B_4 D_45 B_5 R_2 D_46 D_47 D_48 D_49 B_6 B_7 B_8 D_50 D_51 B_9 R_3 D_52 P_3 B_10 D_53 S_5 B_11 S_6 D_54 R_4 S_7 B_12 S_8 D_55 D_56 B_13 R_5 D_58 S_9 B_14 D_59 D_60 D_61 B_15 S_11 D_62 D_63 D_64 D_65 B_16 B_17 B_18 B_19 D_66 B_20 D_68 S_12 R_6 S_13 B_21 D_69 B_22 D_70 D_71 D_72 S_15 B_23 D_73 P_4 D_74 D_75 D_76 B_24 R_7 D_77 B_25 B_26 D_78 D_79 R_8 R_9 S_16 D_80 R_10 R_11 B_27 D_81 D_82 S_17 R_12 B_28 R_13 D_83 R_14 R_15 D_84 R_16 B_29 B_30 S_18 D_86 D_87 R_17 R_18 D_88 B_31 S_19 R_19 B_32 S_20 R_20 R_21 B_33 D_89 R_22 R_23 D_91 D_92 D_93 D_94 R_24 R_25 D_96 S_22 S_23 S_24 S_25 S_26 D_102 D_103 D_104 D_105 D_106 D_107 B_36 B_37 R_26 R_27 B_38 D_108 D_109 D_110 D_111 B_39 D_112 B_40 S_27 D_113 D_114 D_115 D_116 D_117 D_118 D_119 D_120 D_121 D_122 D_123 D_124 D_125 D_126 D_127 D_128 D_129 B_41 B_42 D_130 D_131 D_132 D_133 R_28 D_134 D_135 D_136 D_137 D_138 D_139 D_140 D_141 D_142 D_143 D_144 D_145\n", " 0 '0000099d6bd597052cdcda90ffabf56573fe9d7c79be5fb... 2017-03-09 00:00:00.000000000 0.938469 0.00173334 0.00872445 1.00684 0.00922772 0.124035 0.00877113 0.00470924 nan nan 0.000630135 0.0809863 0.708906 0.1706 0.00620403 0.358587 0.525351 0.255736 nan 0.0639022 0.0594157 0.00646558 0.148698 1.33586 0.00820674 0.0014225 0.207334 0.736463 0.0962188 nan 0.0233811 0.00276806 0.00832165 1.00152 0.00829844 0.161345 0.148266 0.922998 0.354596 0.152025 0.118075 0.00188179 0.158612 0.0657284 0.0183846 0.0636465 0.199617 0.308233 0.0163606 0.401619 0.091071 CR O 0.00712616 0.00766527 nan 0.652984 0.00852044 nan 0.00472983 6 0.272008 0.00836254 0.515222 0.00264403 0.0090133 0.00480751 0.00834172 0.119403 0.0048019 0.108271 0.0508819 nan 0.00755443 0.0804216 0.0690668 nan 0.00432679 0.00756245 nan 0.00772865 0.000271828 0.00157574 0.00423936 0.00143399 nan 0.00227094 0.00406052 0.00712109 0.00245606 0.0023103 0.00353198 0.506612 0.00803302 1.00982 0.0846826 0.00381998 0.0070426 0.000437955 0.00645163 0.00082952 0.00505487 nan 0 0.00572042 0.00708447 nan 0.000198308 0.00890741 nan 1 0.00253721 0.00517736 0.00662618 0.00970514 0.00778159 0.00244996 1.0011 0.00266533 0.00747876 0.00689281 1.50367 1.00613 0.00356854 0.00887059 0.00394973 0.00364714 0.00495003 0.89409 0.135561 0.911191 0.974539 0.0012434 0.766688 1.00869 1.00459 0.893734 nan 0.670041 0.00996848 0.00457161 nan 1.00895 2 nan 0.00432553 nan nan nan 1.00734 0.21006 0.676922 0.00787114 1 0.23825 0 4 0.23212 0.236266 0 0.70228 0.434345 0.0030567 0.686516 0.00873972 1 1.00332 1.00782 1.00008 0.00680497 nan 0.00205169 0.00597188 nan 0.00434506 0.00153473 nan nan nan nan nan 0.00242704 0.00370627 0.00381782 nan 0.00056924 0.000609837 0.00267421\n", " 1 '0000099d6bd597052cdcda90ffabf56573fe9d7c79be5fb... 2017-04-07 00:00:00.000000000 0.936665 0.00577544 0.00492335 1.00065 0.00615131 0.12675 0.000798359 0.00271358 nan nan 0.00252627 0.0694192 0.712795 0.113239 0.00620567 0.35363 0.521311 0.223329 nan 0.0652611 0.0577438 0.00161401 0.149723 1.33979 0.00837324 0.00198443 0.202778 0.720886 0.099804 nan 0.0305986 0.00274936 0.00248207 1.00903 0.00513618 0.140951 0.14353 0.919414 0.326757 0.156201 0.118737 0.00160996 0.148459 0.0939354 0.0130348 0.0655014 0.151387 0.265026 0.0176879 0.406326 0.0868048 CR O 0.00241324 0.00714816 nan 0.647093 0.00223779 nan 0.00387926 6 0.18897 0.00402957 0.509048 0.00419312 0.00784238 0.00128316 0.00652381 0.140611 9.36286e-05 0.101018 0.0404689 nan 0.00483217 0.0814132 0.0741664 nan 0.00420276 0.00530352 nan 0.00186413 0.000978889 0.00989584 0.00759728 0.000509316 nan 0.00981023 0.000126509 0.00596581 0.000395391 0.00132673 0.0077727 0.500855 0.000760442 1.00946 0.0818432 0.000346625 0.00778935 0.00431088 0.0023325 0.00946879 0.00375319 nan 0 0.00758434 0.0066773 nan 0.00114229 0.00590701 nan 1 0.0084272 0.00897916 0.00185411 0.00992378 0.00598744 0.00224682 1.00678 0.00250769 0.00682727 0.00283708 1.50358 1.00579 0.000570901 0.000390776 0.00835129 0.00884997 0.00318008 0.902135 0.136333 0.919876 0.975624 0.00456138 0.786007 1.00008 1.00412 0.906841 nan 0.668647 0.003921 0.00465385 nan 1.00321 2 nan 0.00870721 nan nan nan 1.00765 0.184093 0.822281 0.0034444 1 0.247217 0 4 0.243532 0.241885 0 0.707017 0.430501 0.00130585 0.686414 0.000755019 1 1.00839 1.00433 1.00834 0.00440716 nan 0.00103356 0.00483756 nan 0.00749478 0.00493136 nan nan nan nan nan 0.00395421 0.00316709 0.00503163 nan 0.00957648 0.00549205 0.00921683" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head(2)" ] }, { "cell_type": "code", "execution_count": 5, "id": "dc1da2d1-e538-473b-bbe7-abfa598e00a8", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "There are 458913 unique customers in the dataset\n" ] } ], "source": [ "print(\"There are {} unique customers in the dataset\".format(df.customer_ID.nunique()))" ] }, { "cell_type": "code", "execution_count": 6, "id": "b9fce4ab-9a89-4aba-9d6d-5c3eb0cb9726", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Expression = S_2\n", "Length: 5,531,451 dtype: datetime64[ns] (column)\n", "------------------------------------------------\n", " 0 2017-03-09 00:00:00.000000000\n", " 1 2017-04-07 00:00:00.000000000\n", " 2 2017-05-28 00:00:00.000000000\n", " 3 2017-06-13 00:00:00.000000000\n", " 4 2017-07-16 00:00:00.000000000\n", " ... \n", "5531446 2017-11-05 00:00:00.000000000\n", "5531447 2017-12-23 00:00:00.000000000\n", "5531448 2018-01-06 00:00:00.000000000\n", "5531449 2018-02-06 00:00:00.000000000\n", "5531450 2018-03-14 00:00:00.000000000" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.S_2" ] }, { "cell_type": "code", "execution_count": 7, "id": "427b8705-756b-48be-8959-07aa85fe9520", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Date'" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.rename('S_2','Date')" ] }, { "cell_type": "code", "execution_count": 8, "id": "c9a22093-a71d-4ad2-bdc8-010dc8793395", "metadata": {}, "outputs": [], "source": [ "cols=list(df.columns)\n", "nulls={}\n", "for col in cols:\n", " nulls[col]=df[col].isna().sum().sum()" ] }, { "cell_type": "code", "execution_count": 9, "id": "86f509d4-c993-440f-aee4-3d630bd251e5", "metadata": { "tags": [] }, "outputs": [], "source": [ "# dict(sorted(nulls.items(), key=lambda item: item[1]))" ] }, { "cell_type": "code", "execution_count": 10, "id": "2c1072e4-609a-4327-bf2b-97a2f0b1d2d8", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array('2017-03-01T00:00:00.000000000', dtype='datetime64[ns]')" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.Date.min()" ] }, { "cell_type": "code", "execution_count": 11, "id": "cd7ced19-3772-42d9-9dac-277d47ac5b80", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array('2018-03-31T00:00:00.000000000', dtype='datetime64[ns]')" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.Date.max()" ] }, { "cell_type": "code", "execution_count": 12, "id": "f093e0d8-3fc9-4ecb-9f02-275ccdc12519", "metadata": {}, "outputs": [], "source": [ "cat_cols=['B_30', 'B_38', 'D_114', 'D_116', 'D_117', 'D_120', 'D_126', 'D_63', 'D_64', 'D_66', 'D_68']" ] }, { "cell_type": "markdown", "id": "87a4e3e1-3a7d-4cd6-8866-25b5bf9abdf7", "metadata": {}, "source": [ "### Simple Model 1: Use all numerical features without null values" ] }, { "cell_type": "code", "execution_count": 13, "id": "ce2bf1ab-a93f-4fb3-bbda-ec8fb410337c", "metadata": {}, "outputs": [], "source": [ "cols_use=[]\n", "for col in cols:\n", " if nulls[col]==0 and col not in cat_cols:\n", " cols_use.append(col)" ] }, { "cell_type": "code", "execution_count": 14, "id": "76e1c81a-848d-4aaf-8ff7-4cd1500f1fd1", "metadata": {}, "outputs": [], "source": [ "df_new=df[cols_use].groupby(df.customer_ID,agg='mean')" ] }, { "cell_type": "code", "execution_count": 15, "id": "3de5ff09-d45d-469b-a105-280368cfd647", "metadata": {}, "outputs": [], "source": [ "df_new=df_new.drop('Date_mean',inplace=True)" ] }, { "cell_type": "code", "execution_count": 16, "id": "9fe869c2-51d4-447a-94d8-05cf15468722", "metadata": {}, "outputs": [], "source": [ "cols_use=list(df_new.columns)\n", "cols_use.remove('customer_ID')" ] }, { "cell_type": "code", "execution_count": 17, "id": "025d0519-de44-4c8e-9ee5-aa285f184d5d", "metadata": {}, "outputs": [], "source": [ "# target= vaex.from_csv('train_labels.csv', convert=True)\n", "target=vaex.open('train_labels.csv.hdf5')" ] }, { "cell_type": "code", "execution_count": 18, "id": "63ed150e-cf61-4fff-adec-9b84251b14f4", "metadata": {}, "outputs": [], "source": [ "df_new=df_new.join(target,on='customer_ID')" ] }, { "cell_type": "code", "execution_count": 19, "id": "c451efcf-1f92-4be4-9b3b-a136f30c536d", "metadata": {}, "outputs": [], "source": [ "df_train,df_valid=df_new.split_random([0.7,0.3],random_state=2)" ] }, { "cell_type": "code", "execution_count": 20, "id": "b8fa31c7-ed77-4fa1-be40-88a8ee39d0bc", "metadata": {}, "outputs": [], "source": [ "import vaex.ml.tensorflow" ] }, { "cell_type": "code", "execution_count": 21, "id": "c57af70d-b057-4d09-92ec-902209442628", "metadata": {}, "outputs": [], "source": [ "from tensorflow.keras import layers\n", "from tensorflow import keras" ] }, { "cell_type": "code", "execution_count": 22, "id": "7434d967-d105-4593-a002-0f54f35558fb", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Recommended \"steps_per_epoch\" arg: 628.0\n", "Recommended \"steps_per_epoch\" arg: 269.0\n" ] } ], "source": [ "gen_train = df_train.ml.tensorflow.to_keras_generator(features=cols_use, target=target, batch_size=512)\n", "gen_valid = df_valid.ml.tensorflow.to_keras_generator(features=cols_use, target=target, batch_size=512)" ] }, { "cell_type": "code", "execution_count": 27, "id": "f7fc70f5-1043-4c46-9cd1-eb40516cf19c", "metadata": {}, "outputs": [], "source": [ "early_stopping = keras.callbacks.EarlyStopping(\n", " patience=10,\n", " min_delta=0.001,\n", " restore_best_weights=True)" ] }, { "cell_type": "code", "execution_count": 28, "id": "fc361225-9864-4900-b189-ce0f654601a4", "metadata": {}, "outputs": [], "source": [ "nn_model = keras.Sequential()\n", "nn_model.add(layers.Dense(64, activation='relu', input_shape=[66]))\n", "nn_model.add(layers.BatchNormalization()),\n", "nn_model.add(layers.Dropout(rate=0.3)),\n", "nn_model.add(layers.Dense(8, activation='relu'))\n", "nn_model.add(layers.BatchNormalization()),\n", "nn_model.add(layers.Dropout(rate=0.3)),\n", "nn_model.add(layers.Dense(1,activation='sigmoid'))\n", "\n", "nn_model.compile(optimizer='adam', loss='binary_crossentropy',metrics=['binary_accuracy'])" ] }, { "cell_type": "code", "execution_count": 29, "id": "1766d764-f6db-4ad3-a038-a3e944390f71", "metadata": {}, "outputs": [ { "ename": "ValueError", "evalue": "# customer_ID target\n0 '0000099d6bd597052cdcda90ffabf56573fe9d7c79be5fb... 0\n1 '00000fd6641609c6ece5454664794f0340ad84dddce9a26... 0\n2 '00001b22f846c82c51f6e3958ccd81970162bae8b007e80... 0\n3 '000041bdba6ecadd89a52d11886e8eaaec9325906c97233... 0\n4 '00007889e4fcd2614b6cbe7f8f3d2e5c728eca32d9eb8ad... 0\n... ... ...\n458,908 'ffff41c8a52833b56430603969b9ca48d208e7c192c6a40... 0\n458,909 'ffff518bb2075e4816ee3fe9f3b152c57fc0e6f01bf7fdd... 0\n458,910 'ffff9984b999fccb2b6127635ed0736dda94e544e67e026... 0\n458,911 'ffffa5c46bc8de74f5a4554e74e239c8dee6b9baf388145... 1\n458,912 'fffff1d38b785cef84adeace64f8f83db3a0c31e8d92eab... 0 is not of string or Expression type, but ", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m~\\AppData\\Local\\Temp/ipykernel_2148/1653863919.py\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m nn_model.fit(x=gen_train, validation_data=gen_valid, epochs=100, steps_per_epoch=628, \n\u001b[0m\u001b[0;32m 2\u001b[0m validation_steps=269,callbacks=[early_stopping])\n", "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\tensorflow\\python\\keras\\engine\\training.py\u001b[0m in \u001b[0;36m_method_wrapper\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 106\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_method_wrapper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 107\u001b[0m \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_in_multi_worker_mode\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m \u001b[1;31m# pylint: disable=protected-access\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 108\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mmethod\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 109\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 110\u001b[0m \u001b[1;31m# Running inside `run_distribute_coordinator` already.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\tensorflow\\python\\keras\\engine\\training.py\u001b[0m in \u001b[0;36mfit\u001b[1;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)\u001b[0m\n\u001b[0;32m 1047\u001b[0m \u001b[0mtraining_utils\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mRespectCompiledTrainableState\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1048\u001b[0m \u001b[1;31m# Creates a `tf.data.Dataset` and handles batch and epoch iteration.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1049\u001b[1;33m data_handler = data_adapter.DataHandler(\n\u001b[0m\u001b[0;32m 1050\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1051\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\tensorflow\\python\\keras\\engine\\data_adapter.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, x, y, sample_weight, batch_size, steps_per_epoch, initial_epoch, epochs, shuffle, class_weight, max_queue_size, workers, use_multiprocessing, model, steps_per_execution)\u001b[0m\n\u001b[0;32m 1103\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1104\u001b[0m \u001b[0madapter_cls\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mselect_data_adapter\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1105\u001b[1;33m self._adapter = adapter_cls(\n\u001b[0m\u001b[0;32m 1106\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1107\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\tensorflow\\python\\keras\\engine\\data_adapter.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, x, y, sample_weights, workers, use_multiprocessing, max_queue_size, model, **kwargs)\u001b[0m\n\u001b[0;32m 784\u001b[0m \u001b[1;31m# Since we have to know the dtype of the python generator when we build the\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 785\u001b[0m \u001b[1;31m# dataset, we have to look at a batch to infer the structure.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 786\u001b[1;33m \u001b[0mpeek\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mx\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_peek_and_restore\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 787\u001b[0m \u001b[0mpeek\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_standardize_batch\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpeek\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 788\u001b[0m \u001b[0mpeek\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_process_tensorlike\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpeek\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\tensorflow\\python\\keras\\engine\\data_adapter.py\u001b[0m in \u001b[0;36m_peek_and_restore\u001b[1;34m(x)\u001b[0m\n\u001b[0;32m 841\u001b[0m \u001b[1;33m@\u001b[0m\u001b[0mstaticmethod\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 842\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_peek_and_restore\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 843\u001b[1;33m \u001b[0mpeek\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnext\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 844\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mpeek\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mitertools\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mchain\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mpeek\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 845\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\vaex\\ml\\tensorflow.py\u001b[0m in \u001b[0;36m_generator\u001b[1;34m(features, target, chunk_size, parallel, shuffle, infinite)\u001b[0m\n\u001b[0;32m 79\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mtarget\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 80\u001b[0m \u001b[0mtarget\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mvaex\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mutils\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_ensure_list\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtarget\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 81\u001b[1;33m \u001b[0mtarget\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mvaex\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mutils\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_ensure_strings_from_expressions\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtarget\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 82\u001b[0m \u001b[0mn_target_cols\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtarget\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 83\u001b[0m \u001b[0mcolumn_names\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mfeatures\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mtarget\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\vaex\\utils.py\u001b[0m in \u001b[0;36m_ensure_strings_from_expressions\u001b[1;34m(expressions)\u001b[0m\n\u001b[0;32m 645\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_ensure_strings_from_expressions\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mexpressions\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 646\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0m_issequence\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mexpressions\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 647\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0m_ensure_strings_from_expressions\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mk\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mk\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mexpressions\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 648\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 649\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0m_ensure_string_from_expression\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mexpressions\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\vaex\\utils.py\u001b[0m in \u001b[0;36m\u001b[1;34m(.0)\u001b[0m\n\u001b[0;32m 645\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_ensure_strings_from_expressions\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mexpressions\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 646\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0m_issequence\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mexpressions\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 647\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0m_ensure_strings_from_expressions\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mk\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mk\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mexpressions\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 648\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 649\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0m_ensure_string_from_expression\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mexpressions\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\vaex\\utils.py\u001b[0m in \u001b[0;36m_ensure_strings_from_expressions\u001b[1;34m(expressions)\u001b[0m\n\u001b[0;32m 647\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0m_ensure_strings_from_expressions\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mk\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mk\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mexpressions\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 648\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 649\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0m_ensure_string_from_expression\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mexpressions\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 650\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 651\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\vaex\\utils.py\u001b[0m in \u001b[0;36m_ensure_string_from_expression\u001b[1;34m(expression)\u001b[0m\n\u001b[0;32m 640\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mexpression\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexpression\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 641\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 642\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'%r is not of string or Expression type, but %r'\u001b[0m \u001b[1;33m%\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mexpression\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mexpression\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 643\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 644\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mValueError\u001b[0m: # customer_ID target\n0 '0000099d6bd597052cdcda90ffabf56573fe9d7c79be5fb... 0\n1 '00000fd6641609c6ece5454664794f0340ad84dddce9a26... 0\n2 '00001b22f846c82c51f6e3958ccd81970162bae8b007e80... 0\n3 '000041bdba6ecadd89a52d11886e8eaaec9325906c97233... 0\n4 '00007889e4fcd2614b6cbe7f8f3d2e5c728eca32d9eb8ad... 0\n... ... ...\n458,908 'ffff41c8a52833b56430603969b9ca48d208e7c192c6a40... 0\n458,909 'ffff518bb2075e4816ee3fe9f3b152c57fc0e6f01bf7fdd... 0\n458,910 'ffff9984b999fccb2b6127635ed0736dda94e544e67e026... 0\n458,911 'ffffa5c46bc8de74f5a4554e74e239c8dee6b9baf388145... 1\n458,912 'fffff1d38b785cef84adeace64f8f83db3a0c31e8d92eab... 0 is not of string or Expression type, but " ] } ], "source": [ "nn_model.fit(x=gen_train, validation_data=gen_valid, epochs=100, steps_per_epoch=628, \n", " validation_steps=269,callbacks=[early_stopping])" ] }, { "cell_type": "code", "execution_count": null, "id": "91bfcdd2-c803-425b-8a13-700030b8d6cd", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.8" } }, "nbformat": 4, "nbformat_minor": 5 }