{ "cells": [ { "metadata": { "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5", "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19", "trusted": true }, "cell_type": "code", "source": "# This Python 3 environment comes with many helpful analytics libraries installed\n# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python\n# For example, here's several helpful packages to load in \n\nimport numpy as np # linear algebra\nimport pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n\n# Input data files are available in the \"../input/\" directory.\n# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory\n\nimport os\nprint(os.listdir(\"../input\"))\n\n# Any results you write to the current directory are saved as output.", "execution_count": 1, "outputs": [ { "output_type": "stream", "text": "['creditcard.csv']\n", "name": "stdout" } ] }, { "metadata": { "_uuid": "04bcbe2c5f6b3f74f6f8732c41e8e8e15654d311", "_cell_guid": "1fa9c421-e3e1-4a2c-978f-28f78e01ab34", "trusted": true }, "cell_type": "code", "source": "from numpy.random import seed\nseed(1)\nfrom tensorflow import set_random_seed\nset_random_seed(2)", "execution_count": 2, "outputs": [] }, { "metadata": { "_uuid": "d629ff2d2480ee46fbb7e2d37f6b5fab8052498a", "_cell_guid": "79c7e3d0-c299-4dcb-8224-4455121ee9b0", "trusted": true }, "cell_type": "code", "source": "df = pd.read_csv('../input/creditcard.csv')", "execution_count": 3, "outputs": [] }, { "metadata": { "_uuid": "24bc772bdd624e7d844b74020e273ffe3def4246", "_cell_guid": "ad3d861f-12e4-49ca-a221-6d8bd6be6f8a", "trusted": true }, "cell_type": "code", "source": "df.head()", "execution_count": 4, "outputs": [ { "output_type": "execute_result", "execution_count": 4, "data": { "text/plain": " Time V1 V2 V3 ... V27 V28 Amount Class\n0 0.0 -1.359807 -0.072781 2.536347 ... 0.133558 -0.021053 149.62 0\n1 0.0 1.191857 0.266151 0.166480 ... -0.008983 0.014724 2.69 0\n2 1.0 -1.358354 -1.340163 1.773209 ... -0.055353 -0.059752 378.66 0\n3 1.0 -0.966272 -0.185226 1.792993 ... 0.062723 0.061458 123.50 0\n4 2.0 -1.158233 0.877737 1.548718 ... 0.219422 0.215153 69.99 0\n\n[5 rows x 31 columns]", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
TimeV1V2V3V4V5V6V7V8V9V10V11V12V13V14V15V16V17V18V19V20V21V22V23V24V25V26V27V28AmountClass
00.0-1.359807-0.0727812.5363471.378155-0.3383210.4623880.2395990.0986980.3637870.090794-0.551600-0.617801-0.991390-0.3111691.468177-0.4704010.2079710.0257910.4039930.251412-0.0183070.277838-0.1104740.0669280.128539-0.1891150.133558-0.021053149.620
10.01.1918570.2661510.1664800.4481540.060018-0.082361-0.0788030.085102-0.255425-0.1669741.6127271.0652350.489095-0.1437720.6355580.463917-0.114805-0.183361-0.145783-0.069083-0.225775-0.6386720.101288-0.3398460.1671700.125895-0.0089830.0147242.690
21.0-1.358354-1.3401631.7732090.379780-0.5031981.8004990.7914610.247676-1.5146540.2076430.6245010.0660840.717293-0.1659462.345865-2.8900831.109969-0.121359-2.2618570.5249800.2479980.7716790.909412-0.689281-0.327642-0.139097-0.055353-0.059752378.660
31.0-0.966272-0.1852261.792993-0.863291-0.0103091.2472030.2376090.377436-1.387024-0.054952-0.2264870.1782280.507757-0.287924-0.631418-1.059647-0.6840931.965775-1.232622-0.208038-0.1083000.005274-0.190321-1.1755750.647376-0.2219290.0627230.061458123.500
42.0-1.1582330.8777371.5487180.403034-0.4071930.0959210.592941-0.2705330.8177390.753074-0.8228430.5381961.345852-1.1196700.175121-0.451449-0.237033-0.0381950.8034870.408542-0.0094310.798278-0.1374580.141267-0.2060100.5022920.2194220.21515369.990
\n
" }, "metadata": {} } ] }, { "metadata": { "_uuid": "7f9f454f8341834b0c6ab1a3de99b2577dcd7dd0", "_cell_guid": "ac5f5f90-1607-4c5e-b0e7-845c724c7521", "trusted": true }, "cell_type": "code", "source": "df.describe()", "execution_count": 5, "outputs": [ { "output_type": "execute_result", "execution_count": 5, "data": { "text/plain": " Time V1 ... Amount Class\ncount 284807.000000 2.848070e+05 ... 284807.000000 284807.000000\nmean 94813.859575 3.919560e-15 ... 88.349619 0.001727\nstd 47488.145955 1.958696e+00 ... 250.120109 0.041527\nmin 0.000000 -5.640751e+01 ... 0.000000 0.000000\n25% 54201.500000 -9.203734e-01 ... 5.600000 0.000000\n50% 84692.000000 1.810880e-02 ... 22.000000 0.000000\n75% 139320.500000 1.315642e+00 ... 77.165000 0.000000\nmax 172792.000000 2.454930e+00 ... 25691.160000 1.000000\n\n[8 rows x 31 columns]", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
TimeV1V2V3V4V5V6V7V8V9V10V11V12V13V14V15V16V17V18V19V20V21V22V23V24V25V26V27V28AmountClass
count284807.0000002.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+05284807.000000284807.000000
mean94813.8595753.919560e-155.688174e-16-8.769071e-152.782312e-15-1.552563e-152.010663e-15-1.694249e-15-1.927028e-16-3.137024e-151.768627e-159.170318e-16-1.810658e-151.693438e-151.479045e-153.482336e-151.392007e-15-7.528491e-164.328772e-169.049732e-165.085503e-161.537294e-167.959909e-165.367590e-164.458112e-151.453003e-151.699104e-15-3.660161e-16-1.206049e-1688.3496190.001727
std47488.1459551.958696e+001.651309e+001.516255e+001.415869e+001.380247e+001.332271e+001.237094e+001.194353e+001.098632e+001.088850e+001.020713e+009.992014e-019.952742e-019.585956e-019.153160e-018.762529e-018.493371e-018.381762e-018.140405e-017.709250e-017.345240e-017.257016e-016.244603e-016.056471e-015.212781e-014.822270e-014.036325e-013.300833e-01250.1201090.041527
min0.000000-5.640751e+01-7.271573e+01-4.832559e+01-5.683171e+00-1.137433e+02-2.616051e+01-4.355724e+01-7.321672e+01-1.343407e+01-2.458826e+01-4.797473e+00-1.868371e+01-5.791881e+00-1.921433e+01-4.498945e+00-1.412985e+01-2.516280e+01-9.498746e+00-7.213527e+00-5.449772e+01-3.483038e+01-1.093314e+01-4.480774e+01-2.836627e+00-1.029540e+01-2.604551e+00-2.256568e+01-1.543008e+010.0000000.000000
25%54201.500000-9.203734e-01-5.985499e-01-8.903648e-01-8.486401e-01-6.915971e-01-7.682956e-01-5.540759e-01-2.086297e-01-6.430976e-01-5.354257e-01-7.624942e-01-4.055715e-01-6.485393e-01-4.255740e-01-5.828843e-01-4.680368e-01-4.837483e-01-4.988498e-01-4.562989e-01-2.117214e-01-2.283949e-01-5.423504e-01-1.618463e-01-3.545861e-01-3.171451e-01-3.269839e-01-7.083953e-02-5.295979e-025.6000000.000000
50%84692.0000001.810880e-026.548556e-021.798463e-01-1.984653e-02-5.433583e-02-2.741871e-014.010308e-022.235804e-02-5.142873e-02-9.291738e-02-3.275735e-021.400326e-01-1.356806e-025.060132e-024.807155e-026.641332e-02-6.567575e-02-3.636312e-033.734823e-03-6.248109e-02-2.945017e-026.781943e-03-1.119293e-024.097606e-021.659350e-02-5.213911e-021.342146e-031.124383e-0222.0000000.000000
75%139320.5000001.315642e+008.037239e-011.027196e+007.433413e-016.119264e-013.985649e-015.704361e-013.273459e-015.971390e-014.539234e-017.395934e-016.182380e-016.625050e-014.931498e-016.488208e-015.232963e-013.996750e-015.008067e-014.589494e-011.330408e-011.863772e-015.285536e-011.476421e-014.395266e-013.507156e-012.409522e-019.104512e-027.827995e-0277.1650000.000000
max172792.0000002.454930e+002.205773e+019.382558e+001.687534e+013.480167e+017.330163e+011.205895e+022.000721e+011.559499e+012.374514e+011.201891e+017.848392e+007.126883e+001.052677e+018.877742e+001.731511e+019.253526e+005.041069e+005.591971e+003.942090e+012.720284e+011.050309e+012.252841e+014.584549e+007.519589e+003.517346e+003.161220e+013.384781e+0125691.1600001.000000
\n
" }, "metadata": {} } ] }, { "metadata": { "_uuid": "ae27e9aac252b8abebe73a7f152e8285f6ae6671", "_cell_guid": "8f0c6894-0f3c-4f1a-9b2e-5155cc434ab8", "trusted": true }, "cell_type": "code", "source": "df.isnull().sum()", "execution_count": 6, "outputs": [ { "output_type": "execute_result", "execution_count": 6, "data": { "text/plain": "Time 0\nV1 0\nV2 0\nV3 0\nV4 0\nV5 0\nV6 0\nV7 0\nV8 0\nV9 0\nV10 0\nV11 0\nV12 0\nV13 0\nV14 0\nV15 0\nV16 0\nV17 0\nV18 0\nV19 0\nV20 0\nV21 0\nV22 0\nV23 0\nV24 0\nV25 0\nV26 0\nV27 0\nV28 0\nAmount 0\nClass 0\ndtype: int64" }, "metadata": {} } ] }, { "metadata": { "_uuid": "7ba46b55bc860137cbde1b97572a918dec5970d3", "_cell_guid": "40293e6e-ec35-4847-90f1-1e6e89f70b91", "trusted": true }, "cell_type": "code", "source": "df = df.drop('Time',axis=1)", "execution_count": 7, "outputs": [] }, { "metadata": { "_uuid": "8ab18d55eacc48d3f578c2c1977a789f9b95e03e", "_cell_guid": "25ea497f-93a3-4818-b1e5-a40c9ae9b81d", "trusted": true }, "cell_type": "code", "source": "X = df.drop('Class',axis=1).values \ny = df['Class'].values", "execution_count": 8, "outputs": [] }, { "metadata": { "_uuid": "4e153e0a8bbd54231af019089db7ceac7d4a3ab2", "_cell_guid": "c4737b0e-f780-45b4-b1cc-c0bd1d419b41", "trusted": true }, "cell_type": "code", "source": "X.shape", "execution_count": 9, "outputs": [ { "output_type": "execute_result", "execution_count": 9, "data": { "text/plain": "(284807, 29)" }, "metadata": {} } ] }, { "metadata": { "_uuid": "66ce9da4edfea3e8b6619d5f543b365899a59a5e", "_cell_guid": "5788dbeb-8aa2-42a5-99af-b4e367de3808", "trusted": true }, "cell_type": "code", "source": "X -= X.min(axis=0)\nX /= X.max(axis=0)", "execution_count": 10, "outputs": [] }, { "metadata": { "_uuid": "c36820c67500d54458d9b22ebc2293f2e8ccf99f", "_cell_guid": "57b2f8a6-9c45-4f60-a0ff-8aef07b2f484", "trusted": true }, "cell_type": "code", "source": "X.mean()", "execution_count": 11, "outputs": [ { "output_type": "execute_result", "execution_count": 11, "data": { "text/plain": "0.5213456986251124" }, "metadata": {} } ] }, { "metadata": { "_uuid": "91d77fc484400c0bc3ba4c3b16ebd9873d3da966", "_cell_guid": "f79cfb3e-0a02-4052-b057-dfd6b96ac026", "trusted": true }, "cell_type": "code", "source": "X.shape", "execution_count": 12, "outputs": [ { "output_type": "execute_result", "execution_count": 12, "data": { "text/plain": "(284807, 29)" }, "metadata": {} } ] }, { "metadata": { "_uuid": "156872c244cdf82a28daa404fe1ebaaa96c52d0d", "_cell_guid": "78e7cc64-e345-45c4-8c4e-52aa50cb9c21", "trusted": true }, "cell_type": "code", "source": "from sklearn.model_selection import train_test_split\nX_train, X_test, y_train,y_test = train_test_split(X,y,test_size=0.1)", "execution_count": 13, "outputs": [] }, { "metadata": { "_uuid": "958d9f0b9143a37842c6510696173e357817397d", "_cell_guid": "26067623-40ff-44d1-9fbf-1736f9d5a967", "trusted": true }, "cell_type": "code", "source": "from keras.models import Model\nfrom keras.layers import Input, Dense", "execution_count": 14, "outputs": [ { "output_type": "stream", "text": "Using TensorFlow backend.\n", "name": "stderr" } ] }, { "metadata": { "_uuid": "0e6386095559dafe1c3f4660149894591ea6d0ff", "_cell_guid": "cf479c9f-e2e6-43c2-be27-852b70f2796f", "trusted": true }, "cell_type": "code", "source": "data_in = Input(shape=(29,))\nencoded = Dense(12,activation='tanh')(data_in)\ndecoded = Dense(29,activation='sigmoid')(encoded)\nautoencoder = Model(data_in,decoded)", "execution_count": 15, "outputs": [] }, { "metadata": { "_uuid": "b350c49f16744ceff5a0545b44915ca4a85cbfae", "_cell_guid": "92225494-1294-479f-8003-fb5db2c6db2d", "trusted": true }, "cell_type": "code", "source": "autoencoder.compile(optimizer='adam',loss='mean_squared_error')", "execution_count": 16, "outputs": [] }, { "metadata": { "_uuid": "9cff9216e21c05d9f7fc2e05a426ed065deadabf", "_cell_guid": "a4a83280-5afe-413f-89fa-86d67e3a3adb", "trusted": true }, "cell_type": "code", "source": "autoencoder.fit(X_train,\n X_train,\n epochs = 20, \n batch_size=128, \n validation_data=(X_test,X_test))", "execution_count": 17, "outputs": [ { "output_type": "stream", "text": "Train on 256326 samples, validate on 28481 samples\nEpoch 1/20\n256326/256326 [==============================] - 8s 29us/step - loss: 0.0027 - val_loss: 0.0015\nEpoch 2/20\n256326/256326 [==============================] - 7s 26us/step - loss: 0.0012 - val_loss: 7.5711e-04\nEpoch 3/20\n256326/256326 [==============================] - 7s 26us/step - loss: 5.9954e-04 - val_loss: 5.0860e-04\nEpoch 4/20\n256326/256326 [==============================] - 7s 26us/step - loss: 4.3634e-04 - val_loss: 3.8141e-04\nEpoch 5/20\n256326/256326 [==============================] - 7s 26us/step - loss: 3.4394e-04 - val_loss: 3.1408e-04\nEpoch 6/20\n256326/256326 [==============================] - 7s 26us/step - loss: 2.9318e-04 - val_loss: 2.8245e-04\nEpoch 7/20\n256326/256326 [==============================] - 7s 26us/step - loss: 2.6792e-04 - val_loss: 2.5355e-04\nEpoch 8/20\n256326/256326 [==============================] - 7s 26us/step - loss: 2.3752e-04 - val_loss: 2.2938e-04\nEpoch 9/20\n256326/256326 [==============================] - 7s 26us/step - loss: 2.2411e-04 - val_loss: 2.2209e-04\nEpoch 10/20\n256326/256326 [==============================] - 7s 26us/step - loss: 2.1863e-04 - val_loss: 2.1798e-04\nEpoch 11/20\n256326/256326 [==============================] - 7s 26us/step - loss: 2.1538e-04 - val_loss: 2.1600e-04\nEpoch 12/20\n256326/256326 [==============================] - 7s 26us/step - loss: 2.1341e-04 - val_loss: 2.1404e-04\nEpoch 13/20\n256326/256326 [==============================] - 7s 27us/step - loss: 2.1226e-04 - val_loss: 2.1334e-04\nEpoch 14/20\n256326/256326 [==============================] - 7s 27us/step - loss: 2.1146e-04 - val_loss: 2.1264e-04\nEpoch 15/20\n256326/256326 [==============================] - 7s 26us/step - loss: 2.1092e-04 - val_loss: 2.1200e-04\nEpoch 16/20\n256326/256326 [==============================] - 7s 26us/step - loss: 2.1046e-04 - val_loss: 2.1197e-04\nEpoch 17/20\n256326/256326 [==============================] - 7s 26us/step - loss: 2.1005e-04 - val_loss: 2.1131e-04\nEpoch 18/20\n256326/256326 [==============================] - 7s 26us/step - loss: 2.0971e-04 - val_loss: 2.1132e-04\nEpoch 19/20\n256326/256326 [==============================] - 7s 26us/step - loss: 2.0945e-04 - val_loss: 2.1161e-04\nEpoch 20/20\n256326/256326 [==============================] - 7s 26us/step - loss: 2.0920e-04 - val_loss: 2.1059e-04\n", "name": "stdout" }, { "output_type": "execute_result", "execution_count": 17, "data": { "text/plain": "" }, "metadata": {} } ] }, { "metadata": { "_uuid": "44a06be6edd99f083d7107919ca2e84f06f661a7", "_cell_guid": "1496244f-fa4c-430a-924b-3414ef1a015d", "trusted": true }, "cell_type": "code", "source": "X_test.mean()", "execution_count": 18, "outputs": [ { "output_type": "execute_result", "execution_count": 18, "data": { "text/plain": "0.5214011426360144" }, "metadata": {} } ] }, { "metadata": { "_uuid": "47e3a4a5ef8a3def84ada50a8db94112f7e0f746", "_cell_guid": "67f0ac0d-50ab-4e72-b594-f73e7f978fbb", "trusted": true }, "cell_type": "code", "source": "pred = autoencoder.predict(X_test[0:10])", "execution_count": 19, "outputs": [] }, { "metadata": { "_uuid": "75b98afa5e92031738415f3650f32e0f91a5a4b8", "_cell_guid": "76d93c84-1419-4a17-b8ce-130c1faa049b", "trusted": true }, "cell_type": "code", "source": "import matplotlib.pyplot as plt\nimport numpy as np\n\nwidth = 0.8\n\nprediction = pred[9]\ntrue_value = X_test[9]\n\nindices = np.arange(len(prediction))\n\nfig = plt.figure(figsize=(10,7))\n\nplt.bar(indices, prediction, width=width, \n color='b', label='Predicted Value')\n\nplt.bar([i+0.25*width for i in indices], true_value, \n width=0.5*width, color='r', alpha=0.5, label='True Value')\n\nplt.xticks(indices+width/2., \n ['V{}'.format(i) for i in range(len(prediction))] )\n\nplt.legend()\n\nplt.show()", "execution_count": 20, "outputs": [ { "output_type": "display_data", "data": { "text/plain": "
", "image/png": "\n" }, "metadata": { "needs_background": "light" } } ] }, { "metadata": { "_uuid": "ccddee14adf117aeb4a91469cacf9f53b03797c6", "_cell_guid": "614bafe2-a513-4c53-bde1-22f6133aba25", "trusted": true }, "cell_type": "code", "source": "encoder = Model(data_in,encoded)", "execution_count": 21, "outputs": [] }, { "metadata": { "_uuid": "12e7b1e2d790c693b59a0df2091eeb8ddd631580", "_cell_guid": "06697d10-31b4-462d-8031-fa13c9731f02", "trusted": true }, "cell_type": "code", "source": "enc = encoder.predict(X_test)", "execution_count": 22, "outputs": [] }, { "metadata": { "_uuid": "7ed4bf4664bf01f018920903c01d725b46a0cd77", "_cell_guid": "a552780b-8083-473c-869c-9279e6bdd29a", "trusted": true }, "cell_type": "code", "source": "np.savez('enc.npz',enc,y_test)", "execution_count": 23, "outputs": [] }, { "metadata": { "_uuid": "203d4e60679b665c89bbb60dbf86d88a8a87e9fb", "_cell_guid": "f2233170-0d61-4a1d-a755-ac445610b327", "trusted": true }, "cell_type": "code", "source": "#from sklearn.manifold import TSNE", "execution_count": 24, "outputs": [] }, { "metadata": { "_uuid": "8c020a95be24b1365a1f095c5dbe40c134d33cee", "_cell_guid": "7243c543-d089-42da-9f1f-305c46d1c0cb", "trusted": true }, "cell_type": "code", "source": "#tsne = TSNE(verbose=1,n_iter=300)", "execution_count": 25, "outputs": [] }, { "metadata": { "_uuid": "402b7a2d0170d0254b2aab5d47101cd74eb9a40a", "_cell_guid": "508b4381-9416-4c29-ad5d-2a52271cda1d", "trusted": true }, "cell_type": "code", "source": "#res = tsne.fit_transform(enc)", "execution_count": 26, "outputs": [] }, { "metadata": { "_uuid": "1cf56c49f26a3690464716d14d92c41b97429468", "_cell_guid": "854ed854-13ac-4f07-8ffb-b55eaac3b648", "trusted": true }, "cell_type": "code", "source": "'''\nfig = plt.figure(figsize=(10,7))\nscatter =plt.scatter(res[:,0],res[:,1],c=y_test,cmap='coolwarm', s=0.6)\nscatter.axes.get_xaxis().set_visible(False)\nscatter.axes.get_yaxis().set_visible(False)\n'''", "execution_count": 27, "outputs": [ { "output_type": "execute_result", "execution_count": 27, "data": { "text/plain": "\"\\nfig = plt.figure(figsize=(10,7))\\nscatter =plt.scatter(res[:,0],res[:,1],c=y_test,cmap='coolwarm', s=0.6)\\nscatter.axes.get_xaxis().set_visible(False)\\nscatter.axes.get_yaxis().set_visible(False)\\n\"" }, "metadata": {} } ] }, { "metadata": { "_uuid": "0bba9d69e6b236cb7b82945564a2864aea2bcfec", "_cell_guid": "1ca6b355-af5b-4b8d-a794-8c4a47718982", "trusted": true }, "cell_type": "code", "source": "", "execution_count": null, "outputs": [] }, { "metadata": { "_uuid": "93d11a6866ecbd9df3a1b25ab8d4281d47543ee3", "collapsed": true, "_cell_guid": "3dc58898-2f57-4feb-a7c3-383d433dafb9" }, "cell_type": "markdown", "source": "# VAE" }, { "metadata": { "_uuid": "9a0e3bae957fba4decfd667d04de16d236ad955a", "_cell_guid": "c93f3256-8050-4cf0-9673-d25a089918dc", "trusted": true }, "cell_type": "code", "source": "import numpy as np\nimport matplotlib.pyplot as plt\nfrom scipy.stats import norm\n\nfrom keras.layers import Input, Dense, Lambda\nfrom keras.models import Model\nfrom keras import backend as K\nfrom keras import metrics", "execution_count": 28, "outputs": [] }, { "metadata": { "_uuid": "8fc46fe9924a4e258885f487778f71d2e15c87d1", "_cell_guid": "cca61b79-7b71-49a6-8ca6-a2864ea2639d", "trusted": true }, "cell_type": "code", "source": "batch_size = 100\noriginal_dim = 29\nlatent_dim = 6\nintermediate_dim = 16\nepochs = 50\nepsilon_std = 1.0\n", "execution_count": 29, "outputs": [] }, { "metadata": { "_uuid": "63b7c72aa8aee78c214208905654a2c9e28bf8a1", "_cell_guid": "a70c9072-fec2-4200-8247-98efb58dd616", "trusted": true }, "cell_type": "code", "source": "x = Input(shape=(original_dim,))\nh = Dense(intermediate_dim, activation='relu')(x)\nz_mean = Dense(latent_dim)(h)\nz_log_var = Dense(latent_dim)(h)", "execution_count": 30, "outputs": [] }, { "metadata": { "_uuid": "44fe9903864bc635783ad7e44d4263afeef9af66", "_cell_guid": "e71f7eae-dd80-4e1c-a282-51b047627388", "trusted": true }, "cell_type": "code", "source": "def sampling(args):\n z_mean, z_log_var = args\n epsilon = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim), mean=0.,\n stddev=epsilon_std)\n return z_mean + K.exp(z_log_var / 2) * epsilon", "execution_count": 31, "outputs": [] }, { "metadata": { "_uuid": "d193ae8dd47ee5e363eaa3856205139aff28aebe", "_cell_guid": "7f7049ea-7db6-4ef1-8426-67783f8e6d8b", "trusted": true }, "cell_type": "code", "source": "# note that \"output_shape\" isn't necessary with the TensorFlow backend\nz = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var])", "execution_count": 32, "outputs": [] }, { "metadata": { "_uuid": "0a7aba4af42824764e8f4af790a3b9baf479018e", "_cell_guid": "25a037cf-7a53-455a-8ca9-7e5549f5dc92", "trusted": true }, "cell_type": "code", "source": "# we instantiate these layers separately so as to reuse them later\ndecoder_h = Dense(intermediate_dim, activation='relu')\nh_decoded = decoder_h(z)\n\ndecoder_mean = Dense(original_dim)\nx_decoded_mean = decoder_mean(h_decoded)", "execution_count": 33, "outputs": [] }, { "metadata": { "_uuid": "3cf3e7a6857c7ec10871b732527a0511d9b22820", "_cell_guid": "c4f6d58e-0a4c-4af4-bd90-2474b4cd27ba", "trusted": true }, "cell_type": "code", "source": "# instantiate VAE model\nvae = Model(x, x_decoded_mean)", "execution_count": 34, "outputs": [] }, { "metadata": { "_uuid": "150dd9e4dbbfa8e5abb96bcbfce87deefb57fc9e", "_cell_guid": "5fe0a9ac-c8c5-4566-ba6b-b8263cee0c01", "trusted": true }, "cell_type": "code", "source": "# Compute VAE loss\nxent_loss = original_dim * metrics.mean_squared_error(x, x_decoded_mean)\nkl_loss = - 0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)\nvae_loss = K.mean(xent_loss + kl_loss)", "execution_count": 35, "outputs": [] }, { "metadata": { "_uuid": "4260d4d8134471d12d2647ad2c83f9066745cc23", "_cell_guid": "2be1ce66-b16a-4402-99e4-8baa9833f627", "trusted": true }, "cell_type": "code", "source": "vae.add_loss(vae_loss)", "execution_count": 36, "outputs": [] }, { "metadata": { "_uuid": "019df269c0a73f40b16522d518074f42974348ad", "_cell_guid": "a9e73316-5640-4ec8-9fa4-a1cbe7c2f5a2", "trusted": true }, "cell_type": "code", "source": "from keras.optimizers import RMSprop\nvae.compile(optimizer=RMSprop(lr=0.1))\n#vae.summary()\n", "execution_count": 37, "outputs": [] }, { "metadata": { "_uuid": "753463dcc822c8aee2d2070ba542d436c7fe9233", "_cell_guid": "017c9234-2a07-4ff0-8659-107f036689ee", "trusted": true }, "cell_type": "code", "source": "vae.fit(X_train,\n shuffle=True,\n epochs=epochs,\n batch_size=256,\n validation_data=(X_test, None))", "execution_count": 38, "outputs": [ { "output_type": "stream", "text": "Train on 256326 samples, validate on 28481 samples\nEpoch 1/50\n256326/256326 [==============================] - 5s 18us/step - loss: 3.5317 - val_loss: 0.1338\nEpoch 2/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1327 - val_loss: 0.1285\nEpoch 3/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1326 - val_loss: 0.1293\nEpoch 4/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1326 - val_loss: 0.1325\nEpoch 5/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1327 - val_loss: 0.1200\nEpoch 6/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1328 - val_loss: 0.1358\nEpoch 7/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1326 - val_loss: 0.1296\nEpoch 8/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1326 - val_loss: 0.1369\nEpoch 9/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1326 - val_loss: 0.1307\nEpoch 10/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1326 - val_loss: 0.1308\nEpoch 11/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1325 - val_loss: 0.1398\nEpoch 12/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1325 - val_loss: 0.1469\nEpoch 13/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1326 - val_loss: 0.1284\nEpoch 14/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1326 - val_loss: 0.1342\nEpoch 15/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1326 - val_loss: 0.1300\nEpoch 16/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1327 - val_loss: 0.1238\nEpoch 17/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1326 - val_loss: 0.1397\nEpoch 18/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1326 - val_loss: 0.1162\nEpoch 19/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1326 - val_loss: 0.1380\nEpoch 20/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1327 - val_loss: 0.1237\nEpoch 21/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1326 - val_loss: 0.1274\nEpoch 22/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1326 - val_loss: 0.1254\nEpoch 23/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1327 - val_loss: 0.1326\nEpoch 24/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1326 - val_loss: 0.1247\nEpoch 25/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1326 - val_loss: 0.1278\nEpoch 26/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1326 - val_loss: 0.1309\nEpoch 27/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1325 - val_loss: 0.1331\nEpoch 28/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1325 - val_loss: 0.1457\nEpoch 29/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1326 - val_loss: 0.1336\nEpoch 30/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1326 - val_loss: 0.1360\nEpoch 31/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1325 - val_loss: 0.1319\nEpoch 32/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1326 - val_loss: 0.1312\nEpoch 33/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1326 - val_loss: 0.1307\nEpoch 34/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1326 - val_loss: 0.1314\nEpoch 35/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1327 - val_loss: 0.1327\nEpoch 36/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1326 - val_loss: 0.1435\nEpoch 37/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1327 - val_loss: 0.1419\nEpoch 38/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1326 - val_loss: 0.1360\nEpoch 39/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1325 - val_loss: 0.1252\nEpoch 40/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1326 - val_loss: 0.1305\nEpoch 41/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1326 - val_loss: 0.1333\nEpoch 42/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1326 - val_loss: 0.1245\nEpoch 43/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1327 - val_loss: 0.1414\nEpoch 44/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1326 - val_loss: 0.1474\nEpoch 45/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1326 - val_loss: 0.1366\nEpoch 46/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1326 - val_loss: 0.1247\nEpoch 47/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1326 - val_loss: 0.1233\nEpoch 48/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1325 - val_loss: 0.1406\nEpoch 49/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1327 - val_loss: 0.1303\nEpoch 50/50\n256326/256326 [==============================] - 4s 17us/step - loss: 0.1325 - val_loss: 0.1320\n", "name": "stdout" }, { "output_type": "execute_result", "execution_count": 38, "data": { "text/plain": "" }, "metadata": {} } ] }, { "metadata": { "_uuid": "11a6cd0780e4b3fbe8c6dd1f88a8792b29100eb6", "_cell_guid": "73b34254-1cb3-4e1a-9358-24418adf3d67", "trusted": true }, "cell_type": "code", "source": "", "execution_count": null, "outputs": [] }, { "metadata": { "_uuid": "abed5daec2649652cd3c04dcc6b845b456c06ea5", "_cell_guid": "17031c3a-0715-471c-9875-cbf904e380fb", "trusted": true }, "cell_type": "code", "source": "pred = autoencoder.predict(X_test[0:10])", "execution_count": 39, "outputs": [] }, { "metadata": { "_uuid": "3bc16f512f29f8768e795448b9d083395cc6bd7f", "_cell_guid": "c26b35dd-4a1f-439b-92e9-de596b9cbbe3", "trusted": true }, "cell_type": "code", "source": "\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nwidth = 0.8\n\nprediction = pred[1]\ntrue_value = X_test[1]\n\nindices = np.arange(len(prediction))\n\nfig = plt.figure(figsize=(10,7))\n\nplt.bar(indices, prediction, width=width, \n color='b', label='Predicted Value')\n\nplt.bar([i+0.25*width for i in indices], true_value, \n width=0.5*width, color='r', alpha=0.5, label='True Value')\n\nplt.xticks(indices+width/2., \n ['T{}'.format(i) for i in range(len(pred))] )\n\nplt.legend()\n\nplt.show()", "execution_count": 41, "outputs": [ { "output_type": "display_data", "data": { "text/plain": "
", "image/png": "\n" }, "metadata": { "needs_background": "light" } } ] }, { "metadata": { "_uuid": "0e0f9516866bcb4ea175b7aea3f15f7871bba92e", "_cell_guid": "e6ddec77-5941-4dab-b3c9-9a123db25d8b", "trusted": true }, "cell_type": "code", "source": "frauds = np.where(y_train == 1)", "execution_count": 42, "outputs": [] }, { "metadata": { "_uuid": "87a88eafdad6d84d56b1b0c4a06811895713fe18", "_cell_guid": "2aadc7c3-b2f8-4bc7-906d-d6f561a5cbd4", "trusted": true }, "cell_type": "code", "source": "encoder = Model(x,z_mean)", "execution_count": 43, "outputs": [] }, { "metadata": { "_uuid": "5937ee1cb583fe018eafb2890d5689ca0d65476f", "_cell_guid": "e51763f0-89ab-4b84-ba2d-357f7284c3e8", "trusted": true }, "cell_type": "code", "source": "fraud_encodings = encoder.predict(X_train[frauds],batch_size=128)", "execution_count": 44, "outputs": [] }, { "metadata": { "_uuid": "5e1f46e94560b0a716ea9bf95118bf6e722abe54", "_cell_guid": "da8feb09-3d32-49ca-9932-dae8a8283fdd", "trusted": true }, "cell_type": "code", "source": "fraud_encodings.shape", "execution_count": 45, "outputs": [ { "output_type": "execute_result", "execution_count": 45, "data": { "text/plain": "(443, 6)" }, "metadata": {} } ] }, { "metadata": { "_uuid": "bc0d45d78fad4fcae0337b234f603c42dc687e06", "_cell_guid": "dc3fdc07-ca70-4ae9-a2a3-a3d09e7547c3", "trusted": true }, "cell_type": "code", "source": "decoder_input = Input(shape=(latent_dim,))\n_h_decoded = decoder_h(decoder_input)\n_x_decoded_mean = decoder_mean(_h_decoded)\ngenerator = Model(decoder_input, _x_decoded_mean)", "execution_count": 46, "outputs": [] }, { "metadata": { "_uuid": "5c57ff5566273bf7b234182dd6481f7a25dcde5a", "_cell_guid": "b973bf00-313e-4473-8a26-2d7fa55d2e01", "trusted": true }, "cell_type": "code", "source": "more_frauds = generator.predict(fraud_encodings)", "execution_count": 47, "outputs": [] }, { "metadata": { "trusted": true, "_uuid": "5c6466eaaf38d0bf60d01d4fea34c7f535876e08" }, "cell_type": "code", "source": "", "execution_count": null, "outputs": [] } ], "metadata": { "language_info": { "name": "python", "version": "3.6.6", "mimetype": "text/x-python", "codemirror_mode": { "name": "ipython", "version": 3 }, "pygments_lexer": "ipython3", "nbconvert_exporter": "python", "file_extension": ".py" }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" } }, "nbformat": 4, "nbformat_minor": 1 }