{ "cells": [ { "cell_type": "code", "execution_count": 2, "id": "1315bbf9-9949-4347-bae7-71a952d4fc7a", "metadata": { "tags": [] }, "outputs": [], "source": [ "import vaex\n", "\n", "# from datetime import datetime\n", "# Code for initially converting to hdf5 format\n", "# d_parser = lambda x: datetime.strptime(x, '%Y-%m-%d')\n", "# df= vaex.from_csv('train_data.csv', convert=True, chunk_size=100_000, parse_dates=['S_2'], \n", "# date_parser=d_parser)\n", "df=vaex.open('train_data.csv.hdf5')" ] }, { "cell_type": "code", "execution_count": 3, "id": "e1112f0e-0252-4563-bac9-c5d1058b9601", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(5531451, 190)" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.shape #No rows and features" ] }, { "cell_type": "code", "execution_count": 4, "id": "38c9bdbf-ea0f-4b40-ad33-bc4c2555ca67", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
# | customer_ID | S_2 | P_2 | D_39 | B_1 | B_2 | R_1 | S_3 | D_41 | B_3 | D_42 | D_43 | D_44 | B_4 | D_45 | B_5 | R_2 | D_46 | D_47 | D_48 | D_49 | B_6 | B_7 | B_8 | D_50 | D_51 | B_9 | R_3 | D_52 | P_3 | B_10 | D_53 | S_5 | B_11 | S_6 | D_54 | R_4 | S_7 | B_12 | S_8 | D_55 | D_56 | B_13 | R_5 | D_58 | S_9 | B_14 | D_59 | D_60 | D_61 | B_15 | S_11 | D_62 | D_63 | D_64 | D_65 | B_16 | B_17 | B_18 | B_19 | D_66 | B_20 | D_68 | S_12 | R_6 | S_13 | B_21 | D_69 | B_22 | D_70 | D_71 | D_72 | S_15 | B_23 | D_73 | P_4 | D_74 | D_75 | D_76 | B_24 | R_7 | D_77 | B_25 | B_26 | D_78 | D_79 | R_8 | R_9 | S_16 | D_80 | R_10 | R_11 | B_27 | D_81 | D_82 | S_17 | R_12 | B_28 | R_13 | D_83 | R_14 | R_15 | D_84 | R_16 | B_29 | B_30 | S_18 | D_86 | D_87 | R_17 | R_18 | D_88 | B_31 | S_19 | R_19 | B_32 | S_20 | R_20 | R_21 | B_33 | D_89 | R_22 | R_23 | D_91 | D_92 | D_93 | D_94 | R_24 | R_25 | D_96 | S_22 | S_23 | S_24 | S_25 | S_26 | D_102 | D_103 | D_104 | D_105 | D_106 | D_107 | B_36 | B_37 | R_26 | R_27 | B_38 | D_108 | D_109 | D_110 | D_111 | B_39 | D_112 | B_40 | S_27 | D_113 | D_114 | D_115 | D_116 | D_117 | D_118 | D_119 | D_120 | D_121 | D_122 | D_123 | D_124 | D_125 | D_126 | D_127 | D_128 | D_129 | B_41 | B_42 | D_130 | D_131 | D_132 | D_133 | R_28 | D_134 | D_135 | D_136 | D_137 | D_138 | D_139 | D_140 | D_141 | D_142 | D_143 | D_144 | D_145 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | '0000099d6bd597052cdcda90ffabf56573fe9d7c79be5fb... | 2017-03-09 00:00:00.000000000 | 0.938469 | 0.00173334 | 0.00872445 | 1.00684 | 0.00922772 | 0.124035 | 0.00877113 | 0.00470924 | nan | nan | 0.000630135 | 0.0809863 | 0.708906 | 0.1706 | 0.00620403 | 0.358587 | 0.525351 | 0.255736 | nan | 0.0639022 | 0.0594157 | 0.00646558 | 0.148698 | 1.33586 | 0.00820674 | 0.0014225 | 0.207334 | 0.736463 | 0.0962188 | nan | 0.0233811 | 0.00276806 | 0.00832165 | 1.00152 | 0.00829844 | 0.161345 | 0.148266 | 0.922998 | 0.354596 | 0.152025 | 0.118075 | 0.00188179 | 0.158612 | 0.0657284 | 0.0183846 | 0.0636465 | 0.199617 | 0.308233 | 0.0163606 | 0.401619 | 0.091071 | CR | O | 0.00712616 | 0.00766527 | nan | 0.652984 | 0.00852044 | nan | 0.00472983 | 6 | 0.272008 | 0.00836254 | 0.515222 | 0.00264403 | 0.0090133 | 0.00480751 | 0.00834172 | 0.119403 | 0.0048019 | 0.108271 | 0.0508819 | nan | 0.00755443 | 0.0804216 | 0.0690668 | nan | 0.00432679 | 0.00756245 | nan | 0.00772865 | 0.000271828 | 0.00157574 | 0.00423936 | 0.00143399 | nan | 0.00227094 | 0.00406052 | 0.00712109 | 0.00245606 | 0.0023103 | 0.00353198 | 0.506612 | 0.00803302 | 1.00982 | 0.0846826 | 0.00381998 | 0.0070426 | 0.000437955 | 0.00645163 | 0.00082952 | 0.00505487 | nan | 0 | 0.00572042 | 0.00708447 | nan | 0.000198308 | 0.00890741 | nan | 1 | 0.00253721 | 0.00517736 | 0.00662618 | 0.00970514 | 0.00778159 | 0.00244996 | 1.0011 | 0.00266533 | 0.00747876 | 0.00689281 | 1.50367 | 1.00613 | 0.00356854 | 0.00887059 | 0.00394973 | 0.00364714 | 0.00495003 | 0.89409 | 0.135561 | 0.911191 | 0.974539 | 0.0012434 | 0.766688 | 1.00869 | 1.00459 | 0.893734 | nan | 0.670041 | 0.00996848 | 0.00457161 | nan | 1.00895 | 2 | nan | 0.00432553 | nan | nan | nan | 1.00734 | 0.21006 | 0.676922 | 0.00787114 | 1 | 0.23825 | 0 | 4 | 0.23212 | 0.236266 | 0 | 0.70228 | 0.434345 | 0.0030567 | 0.686516 | 0.00873972 | 1 | 1.00332 | 1.00782 | 1.00008 | 0.00680497 | nan | 0.00205169 | 0.00597188 | nan | 0.00434506 | 0.00153473 | nan | nan | nan | nan | nan | 0.00242704 | 0.00370627 | 0.00381782 | nan | 0.00056924 | 0.000609837 | 0.00267421 |
1 | '0000099d6bd597052cdcda90ffabf56573fe9d7c79be5fb... | 2017-04-07 00:00:00.000000000 | 0.936665 | 0.00577544 | 0.00492335 | 1.00065 | 0.00615131 | 0.12675 | 0.000798359 | 0.00271358 | nan | nan | 0.00252627 | 0.0694192 | 0.712795 | 0.113239 | 0.00620567 | 0.35363 | 0.521311 | 0.223329 | nan | 0.0652611 | 0.0577438 | 0.00161401 | 0.149723 | 1.33979 | 0.00837324 | 0.00198443 | 0.202778 | 0.720886 | 0.099804 | nan | 0.0305986 | 0.00274936 | 0.00248207 | 1.00903 | 0.00513618 | 0.140951 | 0.14353 | 0.919414 | 0.326757 | 0.156201 | 0.118737 | 0.00160996 | 0.148459 | 0.0939354 | 0.0130348 | 0.0655014 | 0.151387 | 0.265026 | 0.0176879 | 0.406326 | 0.0868048 | CR | O | 0.00241324 | 0.00714816 | nan | 0.647093 | 0.00223779 | nan | 0.00387926 | 6 | 0.18897 | 0.00402957 | 0.509048 | 0.00419312 | 0.00784238 | 0.00128316 | 0.00652381 | 0.140611 | 9.36286e-05 | 0.101018 | 0.0404689 | nan | 0.00483217 | 0.0814132 | 0.0741664 | nan | 0.00420276 | 0.00530352 | nan | 0.00186413 | 0.000978889 | 0.00989584 | 0.00759728 | 0.000509316 | nan | 0.00981023 | 0.000126509 | 0.00596581 | 0.000395391 | 0.00132673 | 0.0077727 | 0.500855 | 0.000760442 | 1.00946 | 0.0818432 | 0.000346625 | 0.00778935 | 0.00431088 | 0.0023325 | 0.00946879 | 0.00375319 | nan | 0 | 0.00758434 | 0.0066773 | nan | 0.00114229 | 0.00590701 | nan | 1 | 0.0084272 | 0.00897916 | 0.00185411 | 0.00992378 | 0.00598744 | 0.00224682 | 1.00678 | 0.00250769 | 0.00682727 | 0.00283708 | 1.50358 | 1.00579 | 0.000570901 | 0.000390776 | 0.00835129 | 0.00884997 | 0.00318008 | 0.902135 | 0.136333 | 0.919876 | 0.975624 | 0.00456138 | 0.786007 | 1.00008 | 1.00412 | 0.906841 | nan | 0.668647 | 0.003921 | 0.00465385 | nan | 1.00321 | 2 | nan | 0.00870721 | nan | nan | nan | 1.00765 | 0.184093 | 0.822281 | 0.0034444 | 1 | 0.247217 | 0 | 4 | 0.243532 | 0.241885 | 0 | 0.707017 | 0.430501 | 0.00130585 | 0.686414 | 0.000755019 | 1 | 1.00839 | 1.00433 | 1.00834 | 0.00440716 | nan | 0.00103356 | 0.00483756 | nan | 0.00749478 | 0.00493136 | nan | nan | nan | nan | nan | 0.00395421 | 0.00316709 | 0.00503163 | nan | 0.00957648 | 0.00549205 | 0.00921683 |