{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pyarrow.parquet as pq\n", "import numpy as np\n", "import pandas as pd\n", "from tensorflow.keras.layers import *\n", "from tensorflow.keras.layers import Concatenate\n", "from tensorflow.keras.models import Sequential, Model\n", "from tensorflow.keras.optimizers import Adam\n", "from tensorflow.keras.utils import to_categorical\n", "from tensorflow.keras.metrics import AUC\n", "import tensorflow as tf\n", "import warnings\n", "warnings.filterwarnings('ignore')\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "file = 'QCDToGGQQ_IMGjet_RH1all_jet0_run0_n36272.test.snappy.parquet'" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "data = pq.read_table(file)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "data = data.to_pandas()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
X_jetsptm0y
0[[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0...112.41109521.0982480.0
1[[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0...95.22040614.0306001.0
2[[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0...97.00731717.7289681.0
3[[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0...82.49031114.7027410.0
4[[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.328483...102.53923819.4562570.0
\n", "
" ], "text/plain": [ " X_jets pt m0 \\\n", "0 [[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0... 112.411095 21.098248 \n", "1 [[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0... 95.220406 14.030600 \n", "2 [[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0... 97.007317 17.728968 \n", "3 [[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0... 82.490311 14.702741 \n", "4 [[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.328483... 102.539238 19.456257 \n", "\n", " y \n", "0 0.0 \n", "1 1.0 \n", "2 1.0 \n", "3 0.0 \n", "4 0.0 " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.head()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "70.3982162475586" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data['pt'].min()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "pt = data['pt'].to_numpy()\n", "m0 = data['m0'].to_numpy()\n", "y = data['y'].to_numpy()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "X_jets = data['X_jets'].to_numpy()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "X = np.zeros((36272,3,125,125))" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "for i in range(0,36272):\n", " for j in range(0,3):\n", " for k in range(0,125):\n", " for l in range(0,125):\n", " X[i][j][k][l] = X_jets[i][j][k][l]" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "del data\n", "del X_jets" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "y = to_categorical(y, num_classes=2)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(36272,)" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pt.shape" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "\n", "input1 = Input(shape=(3,125,125))\n", "input2 = Input(shape=(1,))\n", "input3 = Input(shape=(1,))\n", "x1 = Conv2D(3, (5,5), activation='relu', input_shape=(3, 125, 125), data_format='channels_first', padding='same')(input1)\n", "x1 = MaxPooling2D((5,5), data_format='channels_first')(x1)\n", "x1 = Flatten()(x1)\n", "x1 = Dense(12, activation='relu')(x1)\n", "x2 = Dense(4, activation='relu')(input2)\n", "x3 = Dense(4, activation='relu')(input3)\n", "\n", "x = Concatenate(axis=1)([x1, x2, x3])\n", "output = Dense(2, activation='softmax')(x)\n", "model = Model(inputs=[input1, input2, input3], outputs=output)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Model: \"model\"\n", "__________________________________________________________________________________________________\n", " Layer (type) Output Shape Param # Connected to \n", "==================================================================================================\n", " input_1 (InputLayer) [(None, 3, 125, 125 0 [] \n", " )] \n", " \n", " conv2d (Conv2D) (None, 3, 125, 125) 228 ['input_1[0][0]'] \n", " \n", " max_pooling2d (MaxPooling2D) (None, 3, 25, 25) 0 ['conv2d[0][0]'] \n", " \n", " flatten (Flatten) (None, 1875) 0 ['max_pooling2d[0][0]'] \n", " \n", " input_2 (InputLayer) [(None, 1)] 0 [] \n", " \n", " input_3 (InputLayer) [(None, 1)] 0 [] \n", " \n", " dense (Dense) (None, 12) 22512 ['flatten[0][0]'] \n", " \n", " dense_1 (Dense) (None, 4) 8 ['input_2[0][0]'] \n", " \n", " dense_2 (Dense) (None, 4) 8 ['input_3[0][0]'] \n", " \n", " concatenate (Concatenate) (None, 20) 0 ['dense[0][0]', \n", " 'dense_1[0][0]', \n", " 'dense_2[0][0]'] \n", " \n", " dense_3 (Dense) (None, 2) 42 ['concatenate[0][0]'] \n", " \n", "==================================================================================================\n", "Total params: 22,798\n", "Trainable params: 22,798\n", "Non-trainable params: 0\n", "__________________________________________________________________________________________________\n" ] }, { "data": { "image/png": "", "text/plain": [ "" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.summary()\n", "tf.keras.utils.plot_model(model)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "model.compile(loss=\"categorical_crossentropy\", optimizer=Adam(learning_rate=0.0005), metrics=[AUC()])" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/10\n", "1021/1021 [==============================] - 14s 14ms/step - loss: 6.1948 - auc: 0.5858 - val_loss: 2.0117 - val_auc: 0.6429\n", "Epoch 2/10\n", "1021/1021 [==============================] - 12s 12ms/step - loss: 1.3758 - auc: 0.6577 - val_loss: 0.8610 - val_auc: 0.6683\n", "Epoch 3/10\n", "1021/1021 [==============================] - 12s 12ms/step - loss: 0.7029 - auc: 0.7002 - val_loss: 0.6296 - val_auc: 0.7243\n", "Epoch 4/10\n", "1021/1021 [==============================] - 12s 12ms/step - loss: 0.6042 - auc: 0.7456 - val_loss: 0.6179 - val_auc: 0.7396\n", "Epoch 5/10\n", "1021/1021 [==============================] - 12s 12ms/step - loss: 0.5899 - auc: 0.7592 - val_loss: 0.5967 - val_auc: 0.7645\n", "Epoch 6/10\n", "1021/1021 [==============================] - 12s 12ms/step - loss: 0.5832 - auc: 0.7657 - val_loss: 0.5872 - val_auc: 0.7658\n", "Epoch 7/10\n", "1021/1021 [==============================] - 12s 12ms/step - loss: 0.5770 - auc: 0.7714 - val_loss: 0.5954 - val_auc: 0.7581\n", "Epoch 8/10\n", "1021/1021 [==============================] - 12s 12ms/step - loss: 0.5753 - auc: 0.7734 - val_loss: 0.5790 - val_auc: 0.7739\n", "Epoch 9/10\n", "1021/1021 [==============================] - 12s 12ms/step - loss: 0.5692 - auc: 0.7786 - val_loss: 0.5873 - val_auc: 0.7703\n", "Epoch 10/10\n", "1021/1021 [==============================] - 12s 12ms/step - loss: 0.5630 - auc: 0.7851 - val_loss: 0.5882 - val_auc: 0.7710\n" ] } ], "source": [ "with tf.device('/gpu:0'):\n", " model.fit([X, pt, m0], y, epochs=10, batch_size=32, validation_split=0.1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Train AUC Score: 0.785\n", "\n", "Validation AUC Score: 0.771" ] } ], "metadata": { "interpreter": { "hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1" }, "kernelspec": { "display_name": "Python 3.8.10 64-bit", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }