{"cells":[{"metadata":{"_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","trusted":true},"cell_type":"code","source":"# This Python 3 environment comes with many helpful analytics libraries installed\n# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python\n# For example, here's several helpful packages to load in \n\nimport numpy as np # linear algebra\nimport pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n\n# Input data files are available in the \"../input/\" directory.\n# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory\n\nimport os\nprint(os.listdir(\"../input\"))\n\n# Any results you write to the current directory are saved as output.","execution_count":129,"outputs":[]},{"metadata":{"_cell_guid":"1fa9c421-e3e1-4a2c-978f-28f78e01ab34","_uuid":"04bcbe2c5f6b3f74f6f8732c41e8e8e15654d311","trusted":true,"collapsed":true},"cell_type":"code","source":"from numpy.random import seed\nseed(1)\nfrom tensorflow import set_random_seed\nset_random_seed(2)","execution_count":130,"outputs":[]},{"metadata":{"_cell_guid":"79c7e3d0-c299-4dcb-8224-4455121ee9b0","_uuid":"d629ff2d2480ee46fbb7e2d37f6b5fab8052498a","collapsed":true,"trusted":true},"cell_type":"code","source":"df = pd.read_csv('../input/creditcard.csv')","execution_count":131,"outputs":[]},{"metadata":{"_cell_guid":"ad3d861f-12e4-49ca-a221-6d8bd6be6f8a","_uuid":"24bc772bdd624e7d844b74020e273ffe3def4246","trusted":true},"cell_type":"code","source":"df.head()","execution_count":132,"outputs":[]},{"metadata":{"_cell_guid":"ac5f5f90-1607-4c5e-b0e7-845c724c7521","_uuid":"7f9f454f8341834b0c6ab1a3de99b2577dcd7dd0","trusted":true},"cell_type":"code","source":"df.describe()","execution_count":133,"outputs":[]},{"metadata":{"_cell_guid":"8f0c6894-0f3c-4f1a-9b2e-5155cc434ab8","_uuid":"ae27e9aac252b8abebe73a7f152e8285f6ae6671","trusted":true},"cell_type":"code","source":"df.isnull().sum()","execution_count":134,"outputs":[]},{"metadata":{"_cell_guid":"40293e6e-ec35-4847-90f1-1e6e89f70b91","_uuid":"7ba46b55bc860137cbde1b97572a918dec5970d3","collapsed":true,"trusted":true},"cell_type":"code","source":"df = df.drop('Time',axis=1)","execution_count":135,"outputs":[]},{"metadata":{"_cell_guid":"25ea497f-93a3-4818-b1e5-a40c9ae9b81d","_uuid":"8ab18d55eacc48d3f578c2c1977a789f9b95e03e","collapsed":true,"trusted":true},"cell_type":"code","source":"X = df.drop('Class',axis=1).values \ny = df['Class'].values","execution_count":136,"outputs":[]},{"metadata":{"_cell_guid":"c4737b0e-f780-45b4-b1cc-c0bd1d419b41","_uuid":"4e153e0a8bbd54231af019089db7ceac7d4a3ab2","trusted":true},"cell_type":"code","source":"X.shape","execution_count":137,"outputs":[]},{"metadata":{"_cell_guid":"5788dbeb-8aa2-42a5-99af-b4e367de3808","_uuid":"66ce9da4edfea3e8b6619d5f543b365899a59a5e","collapsed":true,"trusted":true},"cell_type":"code","source":"X -= X.min(axis=0)\nX /= X.max(axis=0)","execution_count":138,"outputs":[]},{"metadata":{"_cell_guid":"57b2f8a6-9c45-4f60-a0ff-8aef07b2f484","_uuid":"c36820c67500d54458d9b22ebc2293f2e8ccf99f","trusted":true},"cell_type":"code","source":"X.mean()","execution_count":139,"outputs":[]},{"metadata":{"_cell_guid":"f79cfb3e-0a02-4052-b057-dfd6b96ac026","_uuid":"91d77fc484400c0bc3ba4c3b16ebd9873d3da966","trusted":true},"cell_type":"code","source":"X.shape","execution_count":140,"outputs":[]},{"metadata":{"_cell_guid":"78e7cc64-e345-45c4-8c4e-52aa50cb9c21","_uuid":"156872c244cdf82a28daa404fe1ebaaa96c52d0d","collapsed":true,"trusted":true},"cell_type":"code","source":"from sklearn.model_selection import train_test_split\nX_train, X_test, y_train,y_test = train_test_split(X,y,test_size=0.1)","execution_count":141,"outputs":[]},{"metadata":{"_cell_guid":"572018d0-a703-43a9-aeee-4e1b655e350a","_uuid":"1bf08d19c1f69f194b577e9ef1d652e1a7a24196","collapsed":true,"trusted":true},"cell_type":"code","source":"import os\nos.environ[\"KERAS_BACKEND\"] = \"tensorflow\"\nimport numpy as np\nfrom tqdm import tqdm\nimport matplotlib.pyplot as plt\n\nfrom keras.layers import Input, Embedding, multiply, BatchNormalization\nfrom keras.models import Model, Sequential\nfrom keras.layers.core import Reshape, Dense, Dropout, Flatten\nfrom keras.layers.advanced_activations import LeakyReLU\nfrom keras.layers.convolutional import Conv2D, UpSampling2D\nfrom keras.datasets import mnist\nfrom keras.optimizers import Adam\nfrom keras import backend as K\nfrom keras import initializers\nfrom keras.utils import to_categorical\n\nK.set_image_dim_ordering('th')\n\n# Deterministic output.\n# Tired of seeing the same results every time? Remove the line below.\nnp.random.seed(1000)\n\n# The results are a little better when the dimensionality of the random vector is only 10.\n# The dimensionality has been left at 100 for consistency with other GAN implementations.\nrandomDim = 100\n","execution_count":142,"outputs":[]},{"metadata":{"trusted":true,"collapsed":true,"_uuid":"0e0dc6cbb6ba7841c404ad1722a1bf957b9c0c71"},"cell_type":"code","source":"def build_generator(latent_dim,data_dim):\n\n        model = Sequential()\n\n        model.add(Dense(16, input_dim=latent_dim))\n    \n        model.add(LeakyReLU(alpha=0.2))\n        model.add(BatchNormalization(momentum=0.8))\n        model.add(Dense(32, input_dim=latent_dim))\n    \n        model.add(LeakyReLU(alpha=0.2))\n        model.add(BatchNormalization(momentum=0.8))\n        model.add(Dense(data_dim,activation='tanh'))\n\n        model.summary()\n\n        noise = Input(shape=(latent_dim,))\n        img = model(noise)\n\n        return Model(noise, img)","execution_count":152,"outputs":[]},{"metadata":{"trusted":true,"_uuid":"dff92e28df4d9b059f7e302aff0d4d374f4380bc"},"cell_type":"code","source":"generator = build_generator(latent_dim=10,data_dim=29)","execution_count":153,"outputs":[]},{"metadata":{"collapsed":true,"trusted":true,"_uuid":"52ff75e25d1db36679536594e80073f1e98a8205"},"cell_type":"code","source":"def build_discriminator(data_dim,num_classes):\n    model = Sequential()\n    model.add(Dense(31,input_dim=data_dim))\n    model.add(LeakyReLU(alpha=0.2))\n    model.add(BatchNormalization(momentum=0.8))\n    model.add(Dropout(0.25))\n    model.add(Dense(16,input_dim=data_dim))\n    model.add(LeakyReLU(alpha=0.2))\n    \n    model.summary()\n    img = Input(shape=(data_dim,))\n    features = model(img)\n    valid = Dense(1, activation=\"sigmoid\")(features)\n    label = Dense(num_classes+1, activation=\"softmax\")(features)\n    return Model(img, [valid, label])","execution_count":154,"outputs":[]},{"metadata":{"trusted":true,"_uuid":"c4497a47e1f8d9218ea5463f9476a7c3c7592369"},"cell_type":"code","source":"discriminator = build_discriminator(data_dim=29,num_classes=2)","execution_count":155,"outputs":[]},{"metadata":{"collapsed":true,"trusted":true,"_uuid":"c29a35e4e0a2e9d0b349373c24ab0fae1bdf85f3"},"cell_type":"code","source":"optimizer = Adam(0.0002, 0.5)\ndiscriminator.compile(loss=['binary_crossentropy', 'categorical_crossentropy'],\n    loss_weights=[0.5, 0.5],\n    optimizer=optimizer,\n    metrics=['accuracy'])","execution_count":156,"outputs":[]},{"metadata":{"collapsed":true,"trusted":true,"_uuid":"868d72058acc8074c6144032890def6deb200def"},"cell_type":"code","source":"noise = Input(shape=(10,))\nimg = generator(noise)\ndiscriminator.trainable = False\nvalid,_ = discriminator(img)\ncombined = Model(noise , valid)\ncombined.compile(loss=['binary_crossentropy'],\n    optimizer=optimizer)","execution_count":157,"outputs":[]},{"metadata":{"trusted":true,"_uuid":"52864000452f21f43505dd97ca414deb508e839f"},"cell_type":"code","source":"X_train.shape","execution_count":158,"outputs":[]},{"metadata":{"collapsed":true,"trusted":true,"_uuid":"b85ecac37805e937412c02f47461c0c59a2f3a0d"},"cell_type":"code","source":"from imblearn.under_sampling import RandomUnderSampler","execution_count":159,"outputs":[]},{"metadata":{"collapsed":true,"trusted":true,"_uuid":"e6a1493cf4397402463a18b85a2ae24d7ed93639"},"cell_type":"code","source":"rus = RandomUnderSampler(random_state=42)","execution_count":160,"outputs":[]},{"metadata":{"collapsed":true,"trusted":true,"_uuid":"faf05a4ef5cded3a5e21681d0956cebfbf9c2efb"},"cell_type":"code","source":"X_res, y_res = rus.fit_sample(X, y)","execution_count":161,"outputs":[]},{"metadata":{"trusted":true,"_uuid":"6d1c4f982b9a649a1191ebe975c2a9a26a6f4a3e"},"cell_type":"code","source":"X_res.shape","execution_count":162,"outputs":[]},{"metadata":{"collapsed":true,"trusted":true,"_uuid":"3ee6d6276b542dbb9484e14dbd75156ba8e16efb"},"cell_type":"code","source":"X_res -= X_res.min()\nX_res /= X_res.max()","execution_count":163,"outputs":[]},{"metadata":{"collapsed":true,"trusted":true,"_uuid":"f906b0eb02f40139ed4ee9adde6e0175358d8cc4"},"cell_type":"code","source":"X_test -= X_test.min()\nX_test /= X_test.max()","execution_count":164,"outputs":[]},{"metadata":{"collapsed":true,"trusted":true,"_uuid":"b85f188577271c544c6b82a133f70380a1ba303f"},"cell_type":"code","source":"X_test_res, y_test_res = rus.fit_sample(X_test,y_test)","execution_count":165,"outputs":[]},{"metadata":{"collapsed":true,"trusted":true,"_uuid":"d0ed0b90189d4ae0ffad536db958905c0270d0b4"},"cell_type":"code","source":"from sklearn.metrics import accuracy_score, f1_score","execution_count":166,"outputs":[]},{"metadata":{"trusted":true,"_uuid":"f1c158a5e790d62752767226791a34b8583dae8f"},"cell_type":"code","source":"y_res.shape","execution_count":167,"outputs":[]},{"metadata":{"collapsed":true,"trusted":true,"_uuid":"68eee475d3fa477523453ff78aeef868f36f6079"},"cell_type":"code","source":"def train(X_train,y_train,\n          X_test,y_test,\n          generator,discriminator,\n          combined,\n          num_classes,\n          epochs, \n          batch_size=128):\n    \n    f1_progress = []\n    half_batch = int(batch_size / 2)\n\n    noise_until = epochs\n\n    # Class weights:\n    # To balance the difference in occurences of digit class labels.\n    # 50% of labels that the discriminator trains on are 'fake'.\n    # Weight = 1 / frequency\n    cw1 = {0: 1, 1: 1}\n    cw2 = {i: num_classes / half_batch for i in range(num_classes)}\n    cw2[num_classes] = 1 / half_batch\n\n    for epoch in range(epochs):\n\n        # ---------------------\n        #  Train Discriminator\n        # ---------------------\n\n        # Select a random half batch of images\n        idx = np.random.randint(0, X_train.shape[0], half_batch)\n        imgs = X_train[idx]\n\n        # Sample noise and generate a half batch of new images\n        noise = np.random.normal(0, 1, (half_batch, 10))\n        gen_imgs = generator.predict(noise)\n\n        valid = np.ones((half_batch, 1))\n        fake = np.zeros((half_batch, 1))\n\n        labels = to_categorical(y_train[idx], num_classes=num_classes+1)\n        fake_labels = to_categorical(np.full((half_batch, 1), num_classes), num_classes=num_classes+1)\n\n        # Train the discriminator\n        d_loss_real = discriminator.train_on_batch(imgs, [valid, labels], class_weight=[cw1, cw2])\n        d_loss_fake = discriminator.train_on_batch(gen_imgs, [fake, fake_labels], class_weight=[cw1, cw2])\n        d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)\n\n\n        # ---------------------\n        #  Train Generator\n        # ---------------------\n\n        noise = np.random.normal(0, 1, (batch_size, 10))\n        validity = np.ones((batch_size, 1))\n\n        # Train the generator\n        g_loss = combined.train_on_batch(noise, validity, class_weight=[cw1, cw2])\n\n        # Plot the progress\n        print (\"%d [D loss: %f, acc: %.2f%%, op_acc: %.2f%%] [G loss: %f]\" % (epoch, d_loss[0], 100*d_loss[3], 100*d_loss[4], g_loss))\n        \n        if epoch % 10 == 0:\n            _,y_pred = discriminator.predict(X_test,batch_size=batch_size)\n            #print(y_pred.shape)\n            y_pred = np.argmax(y_pred[:,:-1],axis=1)\n            \n            f1 = f1_score(y_test,y_pred)\n            print('Epoch: {}, F1: {:.5f}, F1P: {}'.format(epoch,f1,len(f1_progress)))\n            f1_progress.append(f1)\n            \n    return f1_progress","execution_count":168,"outputs":[]},{"metadata":{"trusted":true,"_uuid":"bf5e4c217d9b0036e859b818561a9274d32027ce"},"cell_type":"code","source":"f1_p = train(X_res,y_res,\n             X_test,y_test,\n             generator,discriminator,\n             combined,\n             num_classes=2,\n             epochs=5000, \n             batch_size=128)","execution_count":169,"outputs":[]},{"metadata":{"trusted":true,"_uuid":"275574636046ce8cfe041ab9776628447da970ac"},"cell_type":"code","source":"fig = plt.figure(figsize=(10,7))\nplt.plot(f1_p)\nplt.xlabel('10 Epochs')\nplt.ylabel('F1 Score Validation')","execution_count":null,"outputs":[]},{"metadata":{"collapsed":true,"trusted":true,"_uuid":"23dbc742811d55da7177fbbf7bdd75027690d706"},"cell_type":"code","source":"","execution_count":null,"outputs":[]}],"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"name":"python","version":"3.6.4","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat":4,"nbformat_minor":1}