{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Expected Goals Model\n",
"\n",
"Load in pickled preprocessed data, separate into train and test split and train Logistic Regression"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"ExecuteTime": {
"end_time": "2021-01-03T11:17:37.364283Z",
"start_time": "2021-01-03T11:17:34.480145Z"
}
},
"outputs": [],
"source": [
"import sys\n",
"import os\n",
"\n",
"import pandas as pd\n",
"import numpy as np\n",
"import json\n",
"\n",
"import matplotlib.pyplot as plt\n",
"import FCPython\n",
"\n",
"import pickle\n",
"import statsmodels.api as sm\n",
"import statsmodels.formula.api as smf\n",
"\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"import Metrica_Functions_TLMAnalytics as mfun"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Preprocess StatsBomb Event Data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Load Events"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"ExecuteTime": {
"end_time": "2021-01-03T11:18:25.439347Z",
"start_time": "2021-01-03T11:18:24.009193Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"[16, 43, 11, 2]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Get competition ids for relevant competitions\n",
"comps = pd.read_json('open-data-master/data/competitions.json')\n",
"male_comps = comps[comps['competition_gender'] == 'male']\n",
"male_comps_id = list(male_comps['competition_id'].unique())\n",
"male_comps_id"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"ExecuteTime": {
"end_time": "2021-01-03T11:18:30.120116Z",
"start_time": "2021-01-03T11:18:29.265432Z"
}
},
"outputs": [],
"source": [
"# For those competitions, identify the match ids\n",
"male_df_list = []\n",
"for comp in male_comps_id:\n",
" json_files = [x for x in os.listdir('open-data-master/data/matches/' + str(comp)) if 'json' in x]\n",
" for event_json in json_files:\n",
" male_df = pd.read_json('open-data-master/data/matches/' + str(comp) + '/' + event_json)\n",
" male_df_list.append(male_df)\n",
"\n",
"male_df = pd.concat(male_df_list)\n",
"male_matches = list(male_df['match_id'].unique())"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"ExecuteTime": {
"end_time": "2021-01-03T11:21:42.967968Z",
"start_time": "2021-01-03T11:18:41.920269Z"
},
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loading: 15946. 1 / 596\n",
"Loading: 15956. 2 / 596\n",
"Loading: 15973. 3 / 596\n",
"Loading: 15978. 4 / 596\n",
"Loading: 15986. 5 / 596\n",
"Loading: 15998. 6 / 596\n",
"Loading: 16010. 7 / 596\n",
"Loading: 16023. 8 / 596\n",
"Loading: 16029. 9 / 596\n",
"Loading: 16056. 10 / 596\n",
"Loading: 16073. 11 / 596\n",
"Loading: 16079. 12 / 596\n",
"Loading: 16086. 13 / 596\n",
"Loading: 16095. 14 / 596\n",
"Loading: 16109. 15 / 596\n",
"Loading: 16120. 16 / 596\n",
"Loading: 16131. 17 / 596\n",
"Loading: 16136. 18 / 596\n",
"Loading: 16149. 19 / 596\n",
"Loading: 16157. 20 / 596\n",
"Loading: 16173. 21 / 596\n",
"Loading: 16182. 22 / 596\n",
"Loading: 16190. 23 / 596\n",
"Loading: 16196. 24 / 596\n",
"Loading: 16205. 25 / 596\n",
"Loading: 16215. 26 / 596\n",
"Loading: 16231. 27 / 596\n",
"Loading: 16240. 28 / 596\n",
"Loading: 16248. 29 / 596\n",
"Loading: 16265. 30 / 596\n",
"Loading: 16275. 31 / 596\n",
"Loading: 16289. 32 / 596\n",
"Loading: 16306. 33 / 596\n",
"Loading: 16317. 34 / 596\n",
"Loading: 18235. 35 / 596\n",
"Loading: 18236. 36 / 596\n",
"Loading: 18237. 37 / 596\n",
"Loading: 18240. 38 / 596\n",
"Loading: 18241. 39 / 596\n",
"Loading: 18242. 40 / 596\n",
"Loading: 18243. 41 / 596\n",
"Loading: 18244. 42 / 596\n",
"Loading: 18245. 43 / 596\n",
"Loading: 22912. 44 / 596\n",
"Loading: 2302764. 45 / 596\n",
"Loading: 265830. 46 / 596\n",
"Loading: 265835. 47 / 596\n",
"Loading: 265837. 48 / 596\n",
"Loading: 265839. 49 / 596\n",
"Loading: 265857. 50 / 596\n",
"Loading: 265866. 51 / 596\n",
"Loading: 265894. 52 / 596\n",
"Loading: 265896. 53 / 596\n",
"Loading: 265918. 54 / 596\n",
"Loading: 265944. 55 / 596\n",
"Loading: 265952. 56 / 596\n",
"Loading: 265958. 57 / 596\n",
"Loading: 265963. 58 / 596\n",
"Loading: 266015. 59 / 596\n",
"Loading: 266033. 60 / 596\n",
"Loading: 266045. 61 / 596\n",
"Loading: 266056. 62 / 596\n",
"Loading: 266066. 63 / 596\n",
"Loading: 266074. 64 / 596\n",
"Loading: 266106. 65 / 596\n",
"Loading: 266117. 66 / 596\n",
"Loading: 266142. 67 / 596\n",
"Loading: 266148. 68 / 596\n",
"Loading: 266149. 69 / 596\n",
"Loading: 266160. 70 / 596\n",
"Loading: 266166. 71 / 596\n",
"Loading: 266191. 72 / 596\n",
"Loading: 266201. 73 / 596\n",
"Loading: 266230. 74 / 596\n",
"Loading: 266236. 75 / 596\n",
"Loading: 266240. 76 / 596\n",
"Loading: 266254. 77 / 596\n",
"Loading: 266256. 78 / 596\n",
"Loading: 266273. 79 / 596\n",
"Loading: 266274. 80 / 596\n",
"Loading: 266280. 81 / 596\n",
"Loading: 266299. 82 / 596\n",
"Loading: 266310. 83 / 596\n",
"Loading: 266320. 84 / 596\n",
"Loading: 266357. 85 / 596\n",
"Loading: 266406. 86 / 596\n",
"Loading: 266420. 87 / 596\n",
"Loading: 266424. 88 / 596\n",
"Loading: 266433. 89 / 596\n",
"Loading: 266440. 90 / 596\n",
"Loading: 266462. 91 / 596\n",
"Loading: 266467. 92 / 596\n",
"Loading: 266477. 93 / 596\n",
"Loading: 266490. 94 / 596\n",
"Loading: 266491. 95 / 596\n",
"Loading: 266498. 96 / 596\n",
"Loading: 266516. 97 / 596\n",
"Loading: 266525. 98 / 596\n",
"Loading: 266528. 99 / 596\n",
"Loading: 266531. 100 / 596\n",
"Loading: 266557. 101 / 596\n",
"Loading: 266560. 102 / 596\n",
"Loading: 266603. 103 / 596\n",
"Loading: 266613. 104 / 596\n",
"Loading: 266620. 105 / 596\n",
"Loading: 266631. 106 / 596\n",
"Loading: 266653. 107 / 596\n",
"Loading: 266664. 108 / 596\n",
"Loading: 266669. 109 / 596\n",
"Loading: 266670. 110 / 596\n",
"Loading: 266724. 111 / 596\n",
"Loading: 266731. 112 / 596\n",
"Loading: 266741. 113 / 596\n",
"Loading: 266770. 114 / 596\n",
"Loading: 266794. 115 / 596\n",
"Loading: 266815. 116 / 596\n",
"Loading: 266827. 117 / 596\n",
"Loading: 266838. 118 / 596\n",
"Loading: 266846. 119 / 596\n",
"Loading: 266871. 120 / 596\n",
"Loading: 266874. 121 / 596\n",
"Loading: 266883. 122 / 596\n",
"Loading: 266885. 123 / 596\n",
"Loading: 266892. 124 / 596\n",
"Loading: 266916. 125 / 596\n",
"Loading: 266921. 126 / 596\n",
"Loading: 266929. 127 / 596\n",
"Loading: 266952. 128 / 596\n",
"Loading: 266961. 129 / 596\n",
"Loading: 266967. 130 / 596\n",
"Loading: 266986. 131 / 596\n",
"Loading: 266989. 132 / 596\n",
"Loading: 267039. 133 / 596\n",
"Loading: 267058. 134 / 596\n",
"Loading: 267076. 135 / 596\n",
"Loading: 267077. 136 / 596\n",
"Loading: 267085. 137 / 596\n",
"Loading: 267101. 138 / 596\n",
"Loading: 267138. 139 / 596\n",
"Loading: 267183. 140 / 596\n",
"Loading: 267192. 141 / 596\n",
"Loading: 267197. 142 / 596\n",
"Loading: 267212. 143 / 596\n",
"Loading: 267220. 144 / 596\n",
"Loading: 267273. 145 / 596\n",
"Loading: 267274. 146 / 596\n",
"Loading: 267301. 147 / 596\n",
"Loading: 267327. 148 / 596\n",
"Loading: 267343. 149 / 596\n",
"Loading: 267368. 150 / 596\n",
"Loading: 267373. 151 / 596\n",
"Loading: 267395. 152 / 596\n",
"Loading: 267400. 153 / 596\n",
"Loading: 267422. 154 / 596\n",
"Loading: 267432. 155 / 596\n",
"Loading: 267464. 156 / 596\n",
"Loading: 267492. 157 / 596\n",
"Loading: 267499. 158 / 596\n",
"Loading: 267502. 159 / 596\n",
"Loading: 267506. 160 / 596\n",
"Loading: 267520. 161 / 596\n",
"Loading: 267533. 162 / 596\n",
"Loading: 267561. 163 / 596\n",
"Loading: 267567. 164 / 596\n",
"Loading: 267569. 165 / 596\n",
"Loading: 267576. 166 / 596\n",
"Loading: 267590. 167 / 596\n",
"Loading: 267596. 168 / 596\n",
"Loading: 267597. 169 / 596\n",
"Loading: 267611. 170 / 596\n",
"Loading: 267660. 171 / 596\n",
"Loading: 267670. 172 / 596\n",
"Loading: 267675. 173 / 596\n",
"Loading: 303377. 174 / 596\n",
"Loading: 303400. 175 / 596\n",
"Loading: 303421. 176 / 596\n",
"Loading: 303430. 177 / 596\n",
"Loading: 303451. 178 / 596\n",
"Loading: 303470. 179 / 596\n",
"Loading: 303473. 180 / 596\n",
"Loading: 303479. 181 / 596\n",
"Loading: 303487. 182 / 596\n",
"Loading: 303493. 183 / 596\n",
"Loading: 303504. 184 / 596\n",
"Loading: 303516. 185 / 596\n",
"Loading: 303517. 186 / 596\n",
"Loading: 303524. 187 / 596\n",
"Loading: 303532. 188 / 596\n",
"Loading: 303548. 189 / 596\n",
"Loading: 303596. 190 / 596\n",
"Loading: 303600. 191 / 596\n",
"Loading: 303610. 192 / 596\n",
"Loading: 303615. 193 / 596\n",
"Loading: 303634. 194 / 596\n",
"Loading: 303652. 195 / 596\n",
"Loading: 303664. 196 / 596\n",
"Loading: 303666. 197 / 596\n",
"Loading: 303674. 198 / 596\n",
"Loading: 303680. 199 / 596\n",
"Loading: 303682. 200 / 596\n",
"Loading: 303696. 201 / 596\n",
"Loading: 303700. 202 / 596\n",
"Loading: 303707. 203 / 596\n",
"Loading: 303715. 204 / 596\n",
"Loading: 303725. 205 / 596\n",
"Loading: 303731. 206 / 596\n",
"Loading: 3749052. 207 / 596\n",
"Loading: 3749068. 208 / 596\n",
"Loading: 3749079. 209 / 596\n",
"Loading: 3749133. 210 / 596\n",
"Loading: 3749153. 211 / 596\n",
"Loading: 3749192. 212 / 596\n",
"Loading: 3749196. 213 / 596\n",
"Loading: 3749233. 214 / 596\n",
"Loading: 3749246. 215 / 596\n",
"Loading: 3749253. 216 / 596\n",
"Loading: 3749257. 217 / 596\n",
"Loading: 3749276. 218 / 596\n",
"Loading: 3749278. 219 / 596\n",
"Loading: 3749296. 220 / 596\n",
"Loading: 3749310. 221 / 596\n",
"Loading: 3749346. 222 / 596\n",
"Loading: 3749358. 223 / 596\n",
"Loading: 3749360. 224 / 596\n",
"Loading: 3749403. 225 / 596\n",
"Loading: 3749431. 226 / 596\n",
"Loading: 3749434. 227 / 596\n",
"Loading: 3749448. 228 / 596\n",
"Loading: 3749453. 229 / 596\n",
"Loading: 3749454. 230 / 596\n",
"Loading: 3749462. 231 / 596\n",
"Loading: 3749465. 232 / 596\n",
"Loading: 3749493. 233 / 596\n",
"Loading: 3749522. 234 / 596\n",
"Loading: 3749526. 235 / 596\n",
"Loading: 3749528. 236 / 596\n",
"Loading: 3749552. 237 / 596\n",
"Loading: 3749603. 238 / 596\n",
"Loading: 3749642. 239 / 596\n",
"Loading: 3750200. 240 / 596\n",
"Loading: 3750201. 241 / 596\n",
"Loading: 3752619. 242 / 596\n",
"Loading: 68313. 243 / 596\n",
"Loading: 68314. 244 / 596\n",
"Loading: 68315. 245 / 596\n",
"Loading: 68316. 246 / 596\n",
"Loading: 68317. 247 / 596\n",
"Loading: 68318. 248 / 596\n",
"Loading: 68319. 249 / 596\n",
"Loading: 68320. 250 / 596\n",
"Loading: 68321. 251 / 596\n",
"Loading: 68322. 252 / 596\n",
"Loading: 68323. 253 / 596\n",
"Loading: 68324. 254 / 596\n",
"Loading: 68325. 255 / 596\n",
"Loading: 68326. 256 / 596\n",
"Loading: 68327. 257 / 596\n",
"Loading: 68328. 258 / 596\n",
"Loading: 68329. 259 / 596\n",
"Loading: 68330. 260 / 596\n",
"Loading: 68331. 261 / 596\n",
"Loading: 68332. 262 / 596\n",
"Loading: 68333. 263 / 596\n",
"Loading: 68334. 264 / 596\n",
"Loading: 68335. 265 / 596\n",
"Loading: 68336. 266 / 596\n",
"Loading: 68339. 267 / 596\n",
"Loading: 68340. 268 / 596\n",
"Loading: 68341. 269 / 596\n",
"Loading: 68342. 270 / 596\n",
"Loading: 68347. 271 / 596\n",
"Loading: 68348. 272 / 596\n",
"Loading: 68350. 273 / 596\n",
"Loading: 68351. 274 / 596\n",
"Loading: 68352. 275 / 596\n",
"Loading: 68353. 276 / 596\n",
"Loading: 68354. 277 / 596\n",
"Loading: 68356. 278 / 596\n",
"Loading: 68358. 279 / 596\n",
"Loading: 68359. 280 / 596\n",
"Loading: 68360. 281 / 596\n",
"Loading: 68361. 282 / 596\n",
"Loading: 68363. 283 / 596\n",
"Loading: 68364. 284 / 596\n",
"Loading: 68365. 285 / 596\n",
"Loading: 68366. 286 / 596\n",
"Loading: 69138. 287 / 596\n",
"Loading: 69139. 288 / 596\n",
"Loading: 69141. 289 / 596\n",
"Loading: 69142. 290 / 596\n",
"Loading: 69143. 291 / 596\n",
"Loading: 69144. 292 / 596\n",
"Loading: 69145. 293 / 596\n",
"Loading: 69146. 294 / 596\n",
"Loading: 69147. 295 / 596\n",
"Loading: 69148. 296 / 596\n",
"Loading: 69149. 297 / 596\n",
"Loading: 69151. 298 / 596\n",
"Loading: 69153. 299 / 596\n",
"Loading: 69154. 300 / 596\n",
"Loading: 69155. 301 / 596\n",
"Loading: 69156. 302 / 596\n",
"Loading: 69157. 303 / 596\n",
"Loading: 69158. 304 / 596\n",
"Loading: 69159. 305 / 596\n",
"Loading: 69160. 306 / 596\n",
"Loading: 69162. 307 / 596\n",
"Loading: 69164. 308 / 596\n",
"Loading: 69165. 309 / 596\n",
"Loading: 69166. 310 / 596\n",
"Loading: 69169. 311 / 596\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loading: 69170. 312 / 596\n",
"Loading: 69171. 313 / 596\n",
"Loading: 69172. 314 / 596\n",
"Loading: 69173. 315 / 596\n",
"Loading: 69174. 316 / 596\n",
"Loading: 69175. 317 / 596\n",
"Loading: 69176. 318 / 596\n",
"Loading: 69177. 319 / 596\n",
"Loading: 69178. 320 / 596\n",
"Loading: 69179. 321 / 596\n",
"Loading: 69180. 322 / 596\n",
"Loading: 69181. 323 / 596\n",
"Loading: 69182. 324 / 596\n",
"Loading: 69183. 325 / 596\n",
"Loading: 69184. 326 / 596\n",
"Loading: 69185. 327 / 596\n",
"Loading: 69186. 328 / 596\n",
"Loading: 69187. 329 / 596\n",
"Loading: 69189. 330 / 596\n",
"Loading: 69195. 331 / 596\n",
"Loading: 69207. 332 / 596\n",
"Loading: 69209. 333 / 596\n",
"Loading: 69210. 334 / 596\n",
"Loading: 69211. 335 / 596\n",
"Loading: 69212. 336 / 596\n",
"Loading: 69213. 337 / 596\n",
"Loading: 69214. 338 / 596\n",
"Loading: 69215. 339 / 596\n",
"Loading: 69216. 340 / 596\n",
"Loading: 69217. 341 / 596\n",
"Loading: 69218. 342 / 596\n",
"Loading: 69219. 343 / 596\n",
"Loading: 69220. 344 / 596\n",
"Loading: 69221. 345 / 596\n",
"Loading: 69222. 346 / 596\n",
"Loading: 69223. 347 / 596\n",
"Loading: 69224. 348 / 596\n",
"Loading: 69225. 349 / 596\n",
"Loading: 69226. 350 / 596\n",
"Loading: 69227. 351 / 596\n",
"Loading: 69228. 352 / 596\n",
"Loading: 69229. 353 / 596\n",
"Loading: 69230. 354 / 596\n",
"Loading: 69231. 355 / 596\n",
"Loading: 69232. 356 / 596\n",
"Loading: 69233. 357 / 596\n",
"Loading: 69234. 358 / 596\n",
"Loading: 69235. 359 / 596\n",
"Loading: 69236. 360 / 596\n",
"Loading: 69237. 361 / 596\n",
"Loading: 69238. 362 / 596\n",
"Loading: 69239. 363 / 596\n",
"Loading: 69240. 364 / 596\n",
"Loading: 69241. 365 / 596\n",
"Loading: 69242. 366 / 596\n",
"Loading: 69243. 367 / 596\n",
"Loading: 69244. 368 / 596\n",
"Loading: 69245. 369 / 596\n",
"Loading: 69246. 370 / 596\n",
"Loading: 69247. 371 / 596\n",
"Loading: 69248. 372 / 596\n",
"Loading: 69249. 373 / 596\n",
"Loading: 69250. 374 / 596\n",
"Loading: 69251. 375 / 596\n",
"Loading: 69252. 376 / 596\n",
"Loading: 69253. 377 / 596\n",
"Loading: 69254. 378 / 596\n",
"Loading: 69255. 379 / 596\n",
"Loading: 69256. 380 / 596\n",
"Loading: 69257. 381 / 596\n",
"Loading: 69259. 382 / 596\n",
"Loading: 69260. 383 / 596\n",
"Loading: 69262. 384 / 596\n",
"Loading: 69263. 385 / 596\n",
"Loading: 69264. 386 / 596\n",
"Loading: 69265. 387 / 596\n",
"Loading: 69267. 388 / 596\n",
"Loading: 69268. 389 / 596\n",
"Loading: 69269. 390 / 596\n",
"Loading: 69270. 391 / 596\n",
"Loading: 69271. 392 / 596\n",
"Loading: 69272. 393 / 596\n",
"Loading: 69273. 394 / 596\n",
"Loading: 69274. 395 / 596\n",
"Loading: 69275. 396 / 596\n",
"Loading: 69276. 397 / 596\n",
"Loading: 69277. 398 / 596\n",
"Loading: 69278. 399 / 596\n",
"Loading: 69279. 400 / 596\n",
"Loading: 69280. 401 / 596\n",
"Loading: 69282. 402 / 596\n",
"Loading: 69283. 403 / 596\n",
"Loading: 69285. 404 / 596\n",
"Loading: 69286. 405 / 596\n",
"Loading: 69287. 406 / 596\n",
"Loading: 69288. 407 / 596\n",
"Loading: 69289. 408 / 596\n",
"Loading: 69291. 409 / 596\n",
"Loading: 69292. 410 / 596\n",
"Loading: 69293. 411 / 596\n",
"Loading: 69295. 412 / 596\n",
"Loading: 69296. 413 / 596\n",
"Loading: 69297. 414 / 596\n",
"Loading: 69298. 415 / 596\n",
"Loading: 69299. 416 / 596\n",
"Loading: 69300. 417 / 596\n",
"Loading: 69302. 418 / 596\n",
"Loading: 69303. 419 / 596\n",
"Loading: 69304. 420 / 596\n",
"Loading: 69305. 421 / 596\n",
"Loading: 69306. 422 / 596\n",
"Loading: 69307. 423 / 596\n",
"Loading: 69308. 424 / 596\n",
"Loading: 69312. 425 / 596\n",
"Loading: 69314. 426 / 596\n",
"Loading: 69315. 427 / 596\n",
"Loading: 69316. 428 / 596\n",
"Loading: 69318. 429 / 596\n",
"Loading: 69319. 430 / 596\n",
"Loading: 69320. 431 / 596\n",
"Loading: 69322. 432 / 596\n",
"Loading: 69323. 433 / 596\n",
"Loading: 69324. 434 / 596\n",
"Loading: 69325. 435 / 596\n",
"Loading: 69326. 436 / 596\n",
"Loading: 69327. 437 / 596\n",
"Loading: 69328. 438 / 596\n",
"Loading: 69329. 439 / 596\n",
"Loading: 69330. 440 / 596\n",
"Loading: 69331. 441 / 596\n",
"Loading: 69332. 442 / 596\n",
"Loading: 69333. 443 / 596\n",
"Loading: 69334. 444 / 596\n",
"Loading: 69335. 445 / 596\n",
"Loading: 69336. 446 / 596\n",
"Loading: 69337. 447 / 596\n",
"Loading: 69338. 448 / 596\n",
"Loading: 69340. 449 / 596\n",
"Loading: 69343. 450 / 596\n",
"Loading: 70219. 451 / 596\n",
"Loading: 70220. 452 / 596\n",
"Loading: 70221. 453 / 596\n",
"Loading: 70223. 454 / 596\n",
"Loading: 70224. 455 / 596\n",
"Loading: 70225. 456 / 596\n",
"Loading: 70256. 457 / 596\n",
"Loading: 70259. 458 / 596\n",
"Loading: 70260. 459 / 596\n",
"Loading: 70262. 460 / 596\n",
"Loading: 70263. 461 / 596\n",
"Loading: 70264. 462 / 596\n",
"Loading: 70270. 463 / 596\n",
"Loading: 70271. 464 / 596\n",
"Loading: 70272. 465 / 596\n",
"Loading: 70273. 466 / 596\n",
"Loading: 70275. 467 / 596\n",
"Loading: 70276. 468 / 596\n",
"Loading: 70277. 469 / 596\n",
"Loading: 70280. 470 / 596\n",
"Loading: 70281. 471 / 596\n",
"Loading: 70282. 472 / 596\n",
"Loading: 70283. 473 / 596\n",
"Loading: 70284. 474 / 596\n",
"Loading: 70286. 475 / 596\n",
"Loading: 70287. 476 / 596\n",
"Loading: 70288. 477 / 596\n",
"Loading: 70289. 478 / 596\n",
"Loading: 70291. 479 / 596\n",
"Loading: 70292. 480 / 596\n",
"Loading: 70293. 481 / 596\n",
"Loading: 70294. 482 / 596\n",
"Loading: 70295. 483 / 596\n",
"Loading: 70296. 484 / 596\n",
"Loading: 70297. 485 / 596\n",
"Loading: 70298. 486 / 596\n",
"Loading: 70300. 487 / 596\n",
"Loading: 70301. 488 / 596\n",
"Loading: 70302. 489 / 596\n",
"Loading: 70303. 490 / 596\n",
"Loading: 70304. 491 / 596\n",
"Loading: 70305. 492 / 596\n",
"Loading: 70306. 493 / 596\n",
"Loading: 70307. 494 / 596\n",
"Loading: 70308. 495 / 596\n",
"Loading: 70309. 496 / 596\n",
"Loading: 7525. 497 / 596\n",
"Loading: 7529. 498 / 596\n",
"Loading: 7530. 499 / 596\n",
"Loading: 7531. 500 / 596\n",
"Loading: 7532. 501 / 596\n",
"Loading: 7533. 502 / 596\n",
"Loading: 7534. 503 / 596\n",
"Loading: 7535. 504 / 596\n",
"Loading: 7536. 505 / 596\n",
"Loading: 7537. 506 / 596\n",
"Loading: 7538. 507 / 596\n",
"Loading: 7539. 508 / 596\n",
"Loading: 7540. 509 / 596\n",
"Loading: 7541. 510 / 596\n",
"Loading: 7542. 511 / 596\n",
"Loading: 7543. 512 / 596\n",
"Loading: 7544. 513 / 596\n",
"Loading: 7545. 514 / 596\n",
"Loading: 7546. 515 / 596\n",
"Loading: 7547. 516 / 596\n",
"Loading: 7548. 517 / 596\n",
"Loading: 7549. 518 / 596\n",
"Loading: 7550. 519 / 596\n",
"Loading: 7551. 520 / 596\n",
"Loading: 7552. 521 / 596\n",
"Loading: 7553. 522 / 596\n",
"Loading: 7554. 523 / 596\n",
"Loading: 7555. 524 / 596\n",
"Loading: 7556. 525 / 596\n",
"Loading: 7557. 526 / 596\n",
"Loading: 7558. 527 / 596\n",
"Loading: 7559. 528 / 596\n",
"Loading: 7560. 529 / 596\n",
"Loading: 7561. 530 / 596\n",
"Loading: 7562. 531 / 596\n",
"Loading: 7563. 532 / 596\n",
"Loading: 7564. 533 / 596\n",
"Loading: 7565. 534 / 596\n",
"Loading: 7566. 535 / 596\n",
"Loading: 7567. 536 / 596\n",
"Loading: 7568. 537 / 596\n",
"Loading: 7569. 538 / 596\n",
"Loading: 7570. 539 / 596\n",
"Loading: 7571. 540 / 596\n",
"Loading: 7572. 541 / 596\n",
"Loading: 7576. 542 / 596\n",
"Loading: 7577. 543 / 596\n",
"Loading: 7578. 544 / 596\n",
"Loading: 7579. 545 / 596\n",
"Loading: 7580. 546 / 596\n",
"Loading: 7581. 547 / 596\n",
"Loading: 7582. 548 / 596\n",
"Loading: 7583. 549 / 596\n",
"Loading: 7584. 550 / 596\n",
"Loading: 7585. 551 / 596\n",
"Loading: 7586. 552 / 596\n",
"Loading: 8649. 553 / 596\n",
"Loading: 8650. 554 / 596\n",
"Loading: 8651. 555 / 596\n",
"Loading: 8652. 556 / 596\n",
"Loading: 8655. 557 / 596\n",
"Loading: 8656. 558 / 596\n",
"Loading: 8657. 559 / 596\n",
"Loading: 8658. 560 / 596\n",
"Loading: 9575. 561 / 596\n",
"Loading: 9581. 562 / 596\n",
"Loading: 9592. 563 / 596\n",
"Loading: 9602. 564 / 596\n",
"Loading: 9609. 565 / 596\n",
"Loading: 9620. 566 / 596\n",
"Loading: 9636. 567 / 596\n",
"Loading: 9642. 568 / 596\n",
"Loading: 9650. 569 / 596\n",
"Loading: 9661. 570 / 596\n",
"Loading: 9673. 571 / 596\n",
"Loading: 9682. 572 / 596\n",
"Loading: 9695. 573 / 596\n",
"Loading: 9700. 574 / 596\n",
"Loading: 9717. 575 / 596\n",
"Loading: 9726. 576 / 596\n",
"Loading: 9736. 577 / 596\n",
"Loading: 9742. 578 / 596\n",
"Loading: 9754. 579 / 596\n",
"Loading: 9765. 580 / 596\n",
"Loading: 9774. 581 / 596\n",
"Loading: 9783. 582 / 596\n",
"Loading: 9794. 583 / 596\n",
"Loading: 9799. 584 / 596\n",
"Loading: 9811. 585 / 596\n",
"Loading: 9827. 586 / 596\n",
"Loading: 9837. 587 / 596\n",
"Loading: 9855. 588 / 596\n",
"Loading: 9860. 589 / 596\n",
"Loading: 9870. 590 / 596\n",
"Loading: 9880. 591 / 596\n",
"Loading: 9889. 592 / 596\n",
"Loading: 9912. 593 / 596\n",
"Loading: 9924. 594 / 596\n",
"Loading: 9928. 595 / 596\n",
"Loading: 9948. 596 / 596\n"
]
}
],
"source": [
"# For those matches, load the events and append to single dataframe\n",
"json_files = [int(x.split('.')[0]) for x in os.listdir('open-data-master/data/events') if 'json' in x]\n",
"male_matches = [x for x in json_files if x in male_matches]\n",
"df_list = []\n",
"count = 1\n",
"for event_json in male_matches:\n",
" print('Loading: {}. {} / {}'.format(event_json, count, len(male_matches)))\n",
" df = pd.read_json('open-data-master/data/events/' + str(event_json) + '.json')\n",
" df['match_id'] = event_json\n",
" df_list.append(df)\n",
" count +=1\n",
" \n",
"df = pd.concat(df_list)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Filter Shot Events"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"ExecuteTime": {
"end_time": "2021-01-03T11:21:51.925176Z",
"start_time": "2021-01-03T11:21:42.970933Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
" index | \n",
" period | \n",
" timestamp | \n",
" minute | \n",
" second | \n",
" type | \n",
" possession | \n",
" possession_team | \n",
" play_pattern | \n",
" ... | \n",
" match_id | \n",
" clearance | \n",
" off_camera | \n",
" miscontrol | \n",
" 50_50 | \n",
" out | \n",
" injury_stoppage | \n",
" half_start | \n",
" player_off | \n",
" half_end | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 65f16e50-7c5d-4293-b2fc-d20887a772f9 | \n",
" 148 | \n",
" 1 | \n",
" 2021-01-03 00:02:29.094 | \n",
" 2 | \n",
" 29 | \n",
" {'id': 16, 'name': 'Shot'} | \n",
" 6 | \n",
" {'id': 217, 'name': 'Barcelona'} | \n",
" {'id': 1, 'name': 'Regular Play'} | \n",
" ... | \n",
" 15946 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 1 | \n",
" b0f73423-3990-45ae-9dda-3512c2d1aff3 | \n",
" 283 | \n",
" 1 | \n",
" 2021-01-03 00:05:39.239 | \n",
" 5 | \n",
" 39 | \n",
" {'id': 16, 'name': 'Shot'} | \n",
" 11 | \n",
" {'id': 217, 'name': 'Barcelona'} | \n",
" {'id': 1, 'name': 'Regular Play'} | \n",
" ... | \n",
" 15946 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 2 | \n",
" 13b1ddab-d22e-43d9-bfe4-12632fea1a27 | \n",
" 755 | \n",
" 1 | \n",
" 2021-01-03 00:15:28.625 | \n",
" 15 | \n",
" 28 | \n",
" {'id': 16, 'name': 'Shot'} | \n",
" 26 | \n",
" {'id': 217, 'name': 'Barcelona'} | \n",
" {'id': 8, 'name': 'From Keeper'} | \n",
" ... | \n",
" 15946 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
3 rows × 43 columns
\n",
"
"
],
"text/plain": [
" id index period \\\n",
"0 65f16e50-7c5d-4293-b2fc-d20887a772f9 148 1 \n",
"1 b0f73423-3990-45ae-9dda-3512c2d1aff3 283 1 \n",
"2 13b1ddab-d22e-43d9-bfe4-12632fea1a27 755 1 \n",
"\n",
" timestamp minute second type \\\n",
"0 2021-01-03 00:02:29.094 2 29 {'id': 16, 'name': 'Shot'} \n",
"1 2021-01-03 00:05:39.239 5 39 {'id': 16, 'name': 'Shot'} \n",
"2 2021-01-03 00:15:28.625 15 28 {'id': 16, 'name': 'Shot'} \n",
"\n",
" possession possession_team \\\n",
"0 6 {'id': 217, 'name': 'Barcelona'} \n",
"1 11 {'id': 217, 'name': 'Barcelona'} \n",
"2 26 {'id': 217, 'name': 'Barcelona'} \n",
"\n",
" play_pattern ... match_id clearance off_camera \\\n",
"0 {'id': 1, 'name': 'Regular Play'} ... 15946 NaN NaN \n",
"1 {'id': 1, 'name': 'Regular Play'} ... 15946 NaN NaN \n",
"2 {'id': 8, 'name': 'From Keeper'} ... 15946 NaN NaN \n",
"\n",
" miscontrol 50_50 out injury_stoppage half_start player_off half_end \n",
"0 NaN NaN NaN NaN NaN NaN NaN \n",
"1 NaN NaN NaN NaN NaN NaN NaN \n",
"2 NaN NaN NaN NaN NaN NaN NaN \n",
"\n",
"[3 rows x 43 columns]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"shots = df[~df['shot'].isnull()]\n",
"shots.reset_index(drop = True, inplace = True)\n",
"shots.head(3)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Create Expected Goal Features"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"ExecuteTime": {
"end_time": "2021-01-03T11:30:41.273039Z",
"start_time": "2021-01-03T11:29:02.994836Z"
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\Ciaran\\AppData\\Roaming\\Python\\Python36\\site-packages\\ipykernel_launcher.py:36: RuntimeWarning: invalid value encountered in arccos\n"
]
}
],
"source": [
"left_post_x, left_post_y = (120, 36)\n",
"right_post_x, right_post_y = (120, 44)\n",
"\n",
"shots_model=pd.DataFrame(columns=['goal','x','y'])\n",
"for index, shot in shots.iterrows():\n",
" \n",
" # Keep only open play\n",
" open_play = 0\n",
" if shot['shot']['type']['name'] == 'Open Play':\n",
" open_play = 1\n",
"\n",
" # Keep non-headed shots\n",
" header = 0\n",
" if shot['shot']['body_part']['name'] == 'Head':\n",
" header = 1\n",
" if (open_play == 1) & (header == 0):\n",
" # Goal\n",
" if shot['shot']['outcome']['name'] == 'Goal':\n",
" shots_model.loc[index, 'goal'] = 1\n",
" else:\n",
" shots_model.loc[index, 'goal'] = 0\n",
"\n",
" # X, Y locations\n",
" shot_location_x, shot_location_y = shot['location']\n",
" shots_model.loc[index, 'x'] = shot_location_x\n",
" shots_model.loc[index, 'y'] = shot_location_y\n",
" shots_model.loc[index,'c']=abs(shot_location_y-40)\n",
"\n",
" # Distance to centre of goal and angle\n",
" shots_model.loc[index,'distance']=np.sqrt((120-shot_location_x)**2 + (40-shot_location_y)**2)\n",
" \n",
" # Angle to goal (radians)\n",
" a = np.sqrt((shot_location_x - right_post_x)**2 + (shot_location_y - right_post_y)**2)\n",
" b = np.sqrt((left_post_x - right_post_x)**2 + (left_post_y - right_post_y)**2)\n",
" c = np.sqrt((left_post_x - shot_location_x)**2 + (left_post_y - shot_location_y)**2)\n",
" angle_ac = np.arccos((a**2 + c**2 - b**2)/(2*a*c))\n",
" if angle_ac<0:\n",
" angle_ac=np.pi+angle_ac\n",
" shots_model.loc[index,'angle'] =angle_ac\n",
"\n",
" # Play pattern\n",
" shots_model.loc[index, 'play_pattern'] = shot['play_pattern']['name']\n",
"\n",
" # Body part\n",
" shots_model.loc[index, 'body_part'] = shot['shot']['body_part']['name']\n",
"\n",
" # First time\n",
" shots_model.loc[index, 'first_time'] = 0\n",
" if 'first_time' in shot['shot'].keys():\n",
" shots_model.loc[index, 'first_time'] = 1\n",
"\n",
" # Technique\n",
" shots_model.loc[index, 'technique'] = shot['shot']['technique']['name']\n",
"\n",
" # Pressure\n",
" shots_model.loc[index, 'under_pressure'] = 0\n",
" if shot['under_pressure'] == True:\n",
" shots_model.loc[index, 'under_pressure'] = 1\n",
"\n",
" # Freeze Frame - defender location\n",
" # Number of defenders between shot location and goal\n",
" # Distance to nearest defender between shot location and goal\n",
" # Distance to nearest defender\n",
" # TBD: position of nearest defender\n",
" # TBD: angle of goal left after removing blocked defenders (assume ~1m width)\n",
" \n",
" freeze_frame_def = []\n",
" for player in shot['shot']['freeze_frame']:\n",
" if player['teammate'] == False:\n",
" freeze_frame_def.append(player)\n",
" \n",
" distance_nearest_defender = None\n",
" distance_nearest_blocking_defender = None\n",
" blocking_defender = []\n",
" for defender in freeze_frame_def:\n",
" # defender specific\n",
" defender_x, defender_y = defender['location']\n",
" distance_defender = np.sqrt((shot_location_x - defender_x)**2 + (shot_location_y - defender_y)**2)\n",
" if distance_nearest_defender == None:\n",
" distance_nearest_defender = distance_defender\n",
" elif distance_defender < distance_nearest_defender:\n",
" distance_nearest_defender = distance_defender\n",
" distance_defender = None\n",
"\n",
" blocking = mfun.is_inside(shot_location_x, shot_location_y\n",
" , left_post_x, left_post_y\n",
" , right_post_x, right_post_y\n",
" , defender_x, defender_y)\n",
" if blocking == True:\n",
" # If defender is blocking part of the goal..\n",
" blocking_defender.append(defender)\n",
"\n",
" # Measure distance of blocking defender\n",
" distance_blocking_defender = np.sqrt((shot_location_x - defender_x)**2 + (shot_location_y - defender_y)**2)\n",
" \n",
" if distance_nearest_blocking_defender == None:\n",
" distance_nearest_blocking_defender = distance_blocking_defender\n",
" elif distance_blocking_defender < distance_nearest_blocking_defender:\n",
" distance_nearest_blocking_defender = distance_blocking_defender\n",
" distance_blocking_defender = None\n",
" \n",
" shots_model.loc[index, 'distance_nearest_defender'] = distance_nearest_defender\n",
" shots_model.loc[index, 'distance_nearest_blocking_defender'] = distance_nearest_blocking_defender\n",
" shots_model.loc[index, 'number_blocking_defenders'] = len(blocking_defender)\n",
"\n",
" \n",
" # Key Pass info\n",
" # TBD - get info from previous pass\n",
" # Eg. cross / through ball / where it was etc\n",
" \n",
" # StatsBomb xG\n",
" shots_model.loc[index, 'statsbomb_xg'] = shot['shot']['statsbomb_xg']\n",
"\n",
"# # Make locations numeric\n",
"# shots_model['x'] = pd.to_numeric(shots_model['x'])\n",
"# shots_model['y'] = pd.to_numeric(shots_model['y'])\n",
"\n",
"# # Try squared distances\n",
"# shots_model['d2'] = shots_model['distance']**2\n",
"# shots_model['x2'] = shots_model['x']**2\n",
"# shots_model['c2'] = shots_model['c']**2\n",
"# # Try angle * x location\n",
"# shots_model['ax'] = shots_model['angle']*shots_model['x']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Save Preprocessed Data"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"ExecuteTime": {
"end_time": "2021-01-03T11:30:41.336836Z",
"start_time": "2021-01-03T11:30:41.275002Z"
}
},
"outputs": [],
"source": [
"shots_model.to_pickle(\"./shots_model.pkl\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Expected Goals Model"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Load Shot Data"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"ExecuteTime": {
"end_time": "2021-01-03T11:30:41.524375Z",
"start_time": "2021-01-03T11:30:41.337833Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" goal | \n",
" x | \n",
" y | \n",
" c | \n",
" distance | \n",
" angle | \n",
" play_pattern | \n",
" body_part | \n",
" first_time | \n",
" technique | \n",
" under_pressure | \n",
" distance_nearest_defender | \n",
" distance_nearest_blocking_defender | \n",
" number_blocking_defenders | \n",
" statsbomb_xg | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0 | \n",
" 111.7 | \n",
" 51.7 | \n",
" 11.7 | \n",
" 14.345034 | \n",
" 0.336567 | \n",
" Regular Play | \n",
" Right Foot | \n",
" 1.0 | \n",
" Half Volley | \n",
" 0.0 | \n",
" 1.303840 | \n",
" NaN | \n",
" 0.0 | \n",
" 0.075164 | \n",
"
\n",
" \n",
" 1 | \n",
" 0 | \n",
" 114 | \n",
" 27 | \n",
" 13.0 | \n",
" 14.317821 | \n",
" 0.248710 | \n",
" Regular Play | \n",
" Left Foot | \n",
" 1.0 | \n",
" Volley | \n",
" 0.0 | \n",
" 3.700000 | \n",
" NaN | \n",
" 0.0 | \n",
" 0.062892 | \n",
"
\n",
" \n",
" 2 | \n",
" 0 | \n",
" 92 | \n",
" 34.5 | \n",
" 5.5 | \n",
" 28.535066 | \n",
" 0.273578 | \n",
" From Keeper | \n",
" Left Foot | \n",
" 0.0 | \n",
" Normal | \n",
" 0.0 | \n",
" 2.884441 | \n",
" 5.124451 | \n",
" 1.0 | \n",
" 0.020535 | \n",
"
\n",
" \n",
" 4 | \n",
" 0 | \n",
" 107 | \n",
" 25 | \n",
" 15.0 | \n",
" 19.849433 | \n",
" 0.268489 | \n",
" From Corner | \n",
" Right Foot | \n",
" 0.0 | \n",
" Normal | \n",
" 0.0 | \n",
" 3.244996 | \n",
" NaN | \n",
" 0.0 | \n",
" 0.035420 | \n",
"
\n",
" \n",
" 5 | \n",
" 0 | \n",
" 108.1 | \n",
" 27.4 | \n",
" 12.6 | \n",
" 17.331186 | \n",
" 0.323048 | \n",
" Regular Play | \n",
" Left Foot | \n",
" 1.0 | \n",
" Half Volley | \n",
" 1.0 | \n",
" 2.039608 | \n",
" NaN | \n",
" 0.0 | \n",
" 0.089920 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" goal x y c distance angle play_pattern body_part \\\n",
"0 0 111.7 51.7 11.7 14.345034 0.336567 Regular Play Right Foot \n",
"1 0 114 27 13.0 14.317821 0.248710 Regular Play Left Foot \n",
"2 0 92 34.5 5.5 28.535066 0.273578 From Keeper Left Foot \n",
"4 0 107 25 15.0 19.849433 0.268489 From Corner Right Foot \n",
"5 0 108.1 27.4 12.6 17.331186 0.323048 Regular Play Left Foot \n",
"\n",
" first_time technique under_pressure distance_nearest_defender \\\n",
"0 1.0 Half Volley 0.0 1.303840 \n",
"1 1.0 Volley 0.0 3.700000 \n",
"2 0.0 Normal 0.0 2.884441 \n",
"4 0.0 Normal 0.0 3.244996 \n",
"5 1.0 Half Volley 1.0 2.039608 \n",
"\n",
" distance_nearest_blocking_defender number_blocking_defenders statsbomb_xg \n",
"0 NaN 0.0 0.075164 \n",
"1 NaN 0.0 0.062892 \n",
"2 5.124451 1.0 0.020535 \n",
"4 NaN 0.0 0.035420 \n",
"5 NaN 0.0 0.089920 "
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Load shot model\n",
"shots_model = pd.read_pickle(\"./shots_model.pkl\")\n",
"shots_model.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Separate into Train/Test Data"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"ExecuteTime": {
"end_time": "2021-01-03T11:31:02.677856Z",
"start_time": "2021-01-03T11:31:02.655917Z"
}
},
"outputs": [],
"source": [
"# Separate into train, test data for modelling\n",
"X_train, X_test, y_train, y_test = train_test_split(shots_model.loc[:, shots_model.columns != 'goal']\n",
" , shots_model.loc[:, shots_model.columns == 'goal']\n",
" , test_size=0.2, random_state=42)\n",
"train = pd.concat([y_train, X_train], axis=1)\n",
"test = pd.concat([y_test, X_test], axis=1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Logistic Regression"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"ExecuteTime": {
"end_time": "2021-01-03T11:31:04.578548Z",
"start_time": "2021-01-03T11:31:04.503376Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Generalized Linear Model Regression Results \n",
"==================================================================================\n",
"Dep. Variable: ['goal[0]', 'goal[1]'] No. Observations: 9295\n",
"Model: GLM Df Residuals: 9290\n",
"Model Family: Binomial Df Model: 4\n",
"Link Function: logit Scale: 1.0000\n",
"Method: IRLS Log-Likelihood: -2901.7\n",
"Date: Sun, 03 Jan 2021 Deviance: 5803.5\n",
"Time: 11:31:04 Pearson chi2: 9.45e+03\n",
"No. Iterations: 6 \n",
"Covariance Type: nonrobust \n",
"=============================================================================================\n",
" coef std err z P>|z| [0.025 0.975]\n",
"---------------------------------------------------------------------------------------------\n",
"Intercept 1.0519 0.205 5.122 0.000 0.649 1.454\n",
"distance 0.1080 0.008 12.989 0.000 0.092 0.124\n",
"angle -1.6109 0.181 -8.917 0.000 -1.965 -1.257\n",
"distance_nearest_defender -0.1242 0.021 -5.858 0.000 -0.166 -0.083\n",
"number_blocking_defenders 0.3260 0.054 6.053 0.000 0.220 0.432\n",
"=============================================================================================\n"
]
}
],
"source": [
"# A GLM for fitting goal probability\n",
"model_variables = ['distance'\n",
" , 'angle'\n",
" , 'distance_nearest_defender'\n",
" , 'number_blocking_defenders'\n",
" ]\n",
"\n",
"model=''\n",
"for v in model_variables[:-1]:\n",
" model = model + v + ' + '\n",
"model = model + model_variables[-1]\n",
"\n",
"#Fit the model\n",
"xG_model = smf.glm(formula=\"goal ~ \" + model, data=train, \n",
" family=sm.families.Binomial()).fit()\n",
"print(xG_model.summary()) \n",
"xG_model_params=xG_model.params"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Calculate xG on Test data"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"ExecuteTime": {
"end_time": "2021-01-03T11:31:17.119822Z",
"start_time": "2021-01-03T11:31:17.111841Z"
}
},
"outputs": [],
"source": [
"# Calculate xG for GLM using each shot as input (row of shots_model)\n",
"def calculate_xG(sh):\n",
" # For the model 'b', get the intercept\n",
" bsum=xG_model_params[0]\n",
" # For as many variables as put in the model, \n",
" # multiply the coefficient by the value of that shot.\n",
" for i,v in enumerate(model_variables):\n",
" # bsum = intercept + (coefficient * variable value)\n",
" bsum=bsum+xG_model_params[i+1]*sh[v]\n",
" # Calculate probability of goal as 1 / 1 + exp(model output)\n",
" xG = 1/(1+np.exp(bsum)) \n",
" return xG "
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"ExecuteTime": {
"end_time": "2021-01-03T11:31:17.727181Z",
"start_time": "2021-01-03T11:31:17.368141Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" goal | \n",
" x | \n",
" y | \n",
" c | \n",
" distance | \n",
" angle | \n",
" play_pattern | \n",
" body_part | \n",
" first_time | \n",
" technique | \n",
" under_pressure | \n",
" distance_nearest_defender | \n",
" distance_nearest_blocking_defender | \n",
" number_blocking_defenders | \n",
" statsbomb_xg | \n",
" xG | \n",
"
\n",
" \n",
" \n",
" \n",
" 3580 | \n",
" 0 | \n",
" 102.5 | \n",
" 23.8 | \n",
" 16.2 | \n",
" 23.847222 | \n",
" 0.248088 | \n",
" From Corner | \n",
" Right Foot | \n",
" 0.0 | \n",
" Normal | \n",
" 0.0 | \n",
" 3.894868 | \n",
" NaN | \n",
" 0.0 | \n",
" 0.025732 | \n",
" 0.060465 | \n",
"
\n",
" \n",
" 13598 | \n",
" 0 | \n",
" 115 | \n",
" 26 | \n",
" 14.0 | \n",
" 14.866069 | \n",
" 0.192701 | \n",
" Regular Play | \n",
" Left Foot | \n",
" 1.0 | \n",
" Normal | \n",
" 0.0 | \n",
" 5.000000 | \n",
" 10.440307 | \n",
" 1.0 | \n",
" 0.052736 | \n",
" 0.113889 | \n",
"
\n",
" \n",
" 4691 | \n",
" 0 | \n",
" 102.1 | \n",
" 36.3 | \n",
" 3.7 | \n",
" 18.278403 | \n",
" 0.422998 | \n",
" Regular Play | \n",
" Left Foot | \n",
" 0.0 | \n",
" Half Volley | \n",
" 0.0 | \n",
" 1.664332 | \n",
" NaN | \n",
" 0.0 | \n",
" 0.047882 | \n",
" 0.105522 | \n",
"
\n",
" \n",
" 8187 | \n",
" 1 | \n",
" 112.5 | \n",
" 38.1 | \n",
" 1.9 | \n",
" 7.736924 | \n",
" 0.939567 | \n",
" From Goal Kick | \n",
" Right Foot | \n",
" 0.0 | \n",
" Normal | \n",
" 0.0 | \n",
" 1.204159 | \n",
" NaN | \n",
" 0.0 | \n",
" 0.354846 | \n",
" 0.444201 | \n",
"
\n",
" \n",
" 675 | \n",
" 0 | \n",
" 117.4 | \n",
" 30.7 | \n",
" 9.3 | \n",
" 9.656604 | \n",
" 0.263018 | \n",
" Regular Play | \n",
" Left Foot | \n",
" 0.0 | \n",
" Normal | \n",
" 0.0 | \n",
" 3.605551 | \n",
" NaN | \n",
" 0.0 | \n",
" 0.520617 | \n",
" 0.227407 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" goal x y c distance angle play_pattern \\\n",
"3580 0 102.5 23.8 16.2 23.847222 0.248088 From Corner \n",
"13598 0 115 26 14.0 14.866069 0.192701 Regular Play \n",
"4691 0 102.1 36.3 3.7 18.278403 0.422998 Regular Play \n",
"8187 1 112.5 38.1 1.9 7.736924 0.939567 From Goal Kick \n",
"675 0 117.4 30.7 9.3 9.656604 0.263018 Regular Play \n",
"\n",
" body_part first_time technique under_pressure \\\n",
"3580 Right Foot 0.0 Normal 0.0 \n",
"13598 Left Foot 1.0 Normal 0.0 \n",
"4691 Left Foot 0.0 Half Volley 0.0 \n",
"8187 Right Foot 0.0 Normal 0.0 \n",
"675 Left Foot 0.0 Normal 0.0 \n",
"\n",
" distance_nearest_defender distance_nearest_blocking_defender \\\n",
"3580 3.894868 NaN \n",
"13598 5.000000 10.440307 \n",
"4691 1.664332 NaN \n",
"8187 1.204159 NaN \n",
"675 3.605551 NaN \n",
"\n",
" number_blocking_defenders statsbomb_xg xG \n",
"3580 0.0 0.025732 0.060465 \n",
"13598 1.0 0.052736 0.113889 \n",
"4691 0.0 0.047882 0.105522 \n",
"8187 0.0 0.354846 0.444201 \n",
"675 0.0 0.520617 0.227407 "
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Add an xG to my dataframe\n",
"train_xG=train.apply(calculate_xG, axis=1)\n",
"train['xG'] = train_xG\n",
"\n",
"test_xG=test.apply(calculate_xG, axis=1)\n",
"test['xG'] = test_xG\n",
"test.tail()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Model Evaluation"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Check (x, y) Probabilities by # Defenders and Distance to Nearest Defenders"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"ExecuteTime": {
"end_time": "2021-01-03T11:31:25.195303Z",
"start_time": "2021-01-03T11:31:18.264550Z"
},
"scrolled": false
},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZAAAAD+CAYAAAAUNlNSAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvqOYd8AAAIABJREFUeJzt3XmcHFW5//HPM0tCFiBAwpIQdpBNCLsLSJDFgLJcRBBRQJGrKHBdfoLAlctyXcCriAjKKiAgoKAGjLLJosgWZBOUawhcCGFJgIRsJJnM8/ujakhP9eme6kr39OmZ7/v16lcy1adOnarurlPnPHVOmbsjIiJSq7ZmF0BERFqTKhARESlEFYiIiBSiCkRERApRBSIiIoWoAhERkUJUgYiISCGqQEREpBBVICIiUkhHswsgIiLlxluHv0P+mUJm032bu09qYJHKqAIREYnQYpxPMCJ3+p8yb3QDixOkCkREJFJtZvkTN2FaQ1UgIiIRMuIPUqsCERGJVFsNDRC1QERE5F1qgYiISM0Mqy0G0gSqQEREIqUWiIiI1MyoMQbSBKpAREQipRaIiIjUzAzaFQMREZEi1IUlIiKFxN6FFXv5JAczO9rM/tKkbW9gZm5mLXExUq/ypnlsUuG9+Wa20Yrk38rM7A9mdlSzy9HqkiC65X41w6CsQMzsBTNblP7Qe14/aWJ57jGzzzcw/yFmdrqZPWtmC8zs5fRHvk+jtllPZralmd2f/v8sMzuxStqjzWxZyec63cyO67/SgruPdPfp/blNAEucY2ZvpK9zzSqfWcxsdTO7xczmmtlMMzspxzY8/Q7NT7dxl5kdVprG3fd196ty5hWshCukX8fMJqdldTPbIO+6raqthlcztMRVY4Ps7+53NrsQ/eTXwDjgSOCxdNmHgY8CtzerUDXYAXi05P/f7SP9A+6+K4CZbQ/ca2YPuvtjfazX6v4dOAjYlmRiizuA6cDPKqT/BrASsA4wFNgy53a2dfdpZjYa2Bf4iZlt7u5nrkjhc+gG/kjy+f+1wdtqula4jXdQtkCqMbOfmtmvS/4+J73KMjObaGYzzOxUM5udtmSOKEk71Mz+x8xeNLPXzOxnZjas5P0DzexxM3vbzJ4zs0lm9m1gN5If4bstITPb3MzuMLM305bDoSX5rJFeib1tZg8DG1fZn72AvYED3f0hd1+Svv7o7v9Rkm6LtCU0x8yeNrMDSt77qJk9lm7vJTM7o8r2jk6v+ueZ2fOlx2cF7MjyCmQ74PG8K7r734B/AFuE3jezsemxfNPMppnZsSXvtaef9XPp/jxqZuMDeeyaHpc90r/fvbI2syvN7EIz+32ax0NmtnHJuvukn+9cM7vIzO614q3Ro4AfuPsMd38Z+AFwdJX0XcDr7r7Q3d9y9/tr2Zi7z3b3XwDHAaeY2RrQu0VtZpuk+zQ3/c3ckC6/L83mifR7f1hwI72395q7XwQ8Uks5W1nsLRBVIOW+DmyTngh3A44BjnL3nqnK1gZGk1zRHwVcYmbvSd87B9gMmABskqY5HcDMdgauJrnqGwV8CHjB3U8D/gwcn3Z9HG9mI0iuHq8D1gQOBy4ys63S7VwIvENy5fi59FXJXsBD7j6jUgIz6wRuIWmNrAmcAFxbsl8LSFovo0haLceZ2UGBfEYAPwb2dfeVgQ9Qw8k+kN8dZjYH+DJwgZm9DawFzDCzP+TMYyeSz2RqhSS/BGYAY4FDgO+Y2Z7pe18jOfb7AauQHOeFmfw/kubxcXe/u8I2DgfOBFYDpgHfTtcdTdI6PAVYA3iW5JhV2pdd0+NRyVbAEyV/P5Euq+Rh4HAzq/b9yeN3JL0ZOwfeO5vke7UasC5wAYC7fyh9f9v0e99Tscwxs11XsDwDRhuW+9Wc8g1ev02/rD2vYwHcfSHwaeCHwDXACYGT77fcfbG73wv8HjjUzAw4Fviqu7/p7vOA7wCfTNc5BrjC3e9w9253f9nd/1mhbB8jqVx+7u5d6VX0TcAhZtYOfBw43d0XuPvfgWr9zaOBV3v+sKTfe056RfhOuvh9wEjge2nr5E/ArSQnPtz9Hnd/Ki33kyQnzN0rbK8b2NrMhrn7K+7+dJWyVeXue5OclB5391WA7wHfdPdR7r5vlVXfl+7jfJKT5C+Af2UTpa2JXYGT3f0dd38cuAz4TJrk88B/uvuznnjC3d8oyeITwCXAfu7+cJXy3OzuD7t7F3AtyQUGJBXT0+5+c/rejyn5rALH4y/uPqrKdkYCc0v+nguMTL+b2X3fJC37ROCbZvbZdPlQM1tiZqtW2U62XEuB2cDqgbeXAusDY9NjXPVmj/SzbcoNIbHp6cLK+2qGwVyBHJR+WXtel/a8kZ4MppN8hjdm1nvL3ReU/P1/JFevY4DhwKM9lRJJf+2YNN144LmcZVsf2KW0ggOOIGn9jCG52nspU4ZK3iBpqfTs25vpSWgHkn5v0vK/5O7dmTzHAZjZLmZ2t5nNMrO5wBdJKqZe0uNyWPr+K2m3zeahQlnvGxjWC7x/fLrfTwBbpf8/G/jP9JisWWWfH0w/05Ekx2wrkso8ayzQU9mX7Td9f2ZfAW5096eqpIHelcJCkhN9z/bf/RzTVm7FlmIO80laSj1WAeaXtJ5LHQPc4e73AR8Bzk4rkfcBj7n73MA6QWkLdgzwZuDtk0h+Rw+nXaMr2toZVNSF1YLM7MskJ9eZJD+AUqulXTU91kvTzQYWAVuVVEqrpicxSE4UlWIV2R/4S8C9mQpupLsfB8wi6bsu7YsvOwGXuAvYyczWrZJmJjDezEq/D+sBL6f/vw6YDIx391VJgrLBax53vy1tOawD/BO4tEK6kSWvFwPv/ySt6O4lCfivD7ycHtNR7v56lf0pzec1ktbb/oG3ZwKrm9nKJctK97vaZwZJC+QgM/tKnrIEvELSrQMkd1GV/l3A0yQB9B7bpstCOki+R7j788Ak4FySFthZNW73wDSvslaYu7/q7se6+1jgCyRdsbnvvBrMzKDDLPerGVSBZJjZZsB/k3RjfQY4ycwmZJKdacmtsbuRdDf9Kr16vxQ4r+fq2MzGpX3kAJcDnzWzPc2sLX2v5+r8NaB03MCtwGZm9hkz60xfO5nZFu6+DLgZOMPMhpvZliSxmCB3vx24m6TLbpe03J0kV5o9HiKJc5yUbmsiyQn3+vT9lUmu1N9JYzmfqnDs1jKzA9IKdjHJFfGySmXLaVuSVsj2wN9qXTkN7P4bgROpu79EcjfPd81sJTPbhuTK/No0yWUkV+abWmKbnkBxaiawJ3CimX2p1rKRdH++18wOsmRcypdJWkxFXQ18Lf1ujSWJ511ZIe3NwGHpttuBt0mO88bkfDRR2h16BElM7pxM915Pmk+UXLy8lebd853Ifu/zbHMllrech6Z/D1jqworXLZlulN+kP+JrSH4MT7j7v4BTgV+YWc+X9lWSH8JMkhPNF0tiGSeTBEkfTAO+dwLvgXe7xT4LnEfSN30vyVU1wPkk8Y23zOzHaZfKPiTxk5npNs9h+Q/neJJukFdJThA/72NfDyaplK4B5gDPk3SJTUrLtgQ4gOSWzNnARcCRJfv1JeAsM5tHclNAtluvRxvJSWsmSXfG7um6haRdW2+mcantWX4nVl/e3/O5ktyBNYvkxoCQw4EN0jL/Bvgvd78jfe+HJPt6O8kJ9nJgWOnKaetpT+Bkq/HuKXefTdKKOZekq3FLkmD/4lB6M9st3adKLia5GeIp4O8kFdTFFbb9AMmFwH+RfJ9vA6aQxNd+aWbbVdnOE2k5ppHEib7q7qdXSLsT8FCafjLwH2mLB+AM4Kq0S/LQdB/npxdmlSwiuTCBpIW7qEraltbzSNuYu7As3D0qIemV+TXuviLdDCJBaRfiDOCIKnd0ySCxfnuHnzq82j0TvX1x/huPuvuODSxSmcHcAhFpOjP7iJmNSlu4p5JceD7Y5GJJBKyGW3ibdRvvYB6JLhKD95PcpDAEeIbk7sAB2y0jtYl9JLoqkBq4+z2s2F0yIr24+xkksQCRMpHXH6pARERi1ApzYTW1AjGze3bf7YO73/PH3zck/5a+QaCVyx400PYnK/Jfei0ifwpeNYFB9w0xcdJHAbjnvr80dIPNim3kpRaIiEiErInjO/JSBSIiEqn2ZhegD6pAREQi1PNEwpi1bAXS0vGNrGj3JdZyxahexyqCE0bo+xj5iaxHnvNCf8VJ6iH2krZsBSIiMtDFXoFoJLqISKSshlefeSVPQH3WkidvfjPw/nrpYxseM7MnzWy/vvJUBSIiEikzy/3qI592klmT9yWZtPPwdCbvUv9J8nyb7Ugmcr2or/K1RBdWS8c7oih7DGWok3oezyj7wovuX4P3Jc9xj/J4lgudT2KMi+RtWeS0MzDN3acDmNn1JM9xeaYkjbP8gWSrksxQXVVLVCAiIoNRHbuIxtH7KaYzgF0yac4AbjezE4ARwF59ZaouLBGRSJnlfwGjzWxqyevfS7MKZJ9tih0OXJk+rmI/kucgVa0j1AIREYmU1daJNbvK80Bm0Psx2OtS3kV1DMsfMvdA+rTH0UDFx0erBSIiEqFa7sDKUc08AmxqZhua2RCSIPnkTJqep2tiZlsAK5E8zbOiKFsgLRM0j6KcTS5DFMegoP4ue0MDtXn3pYFlyHs8IwxYxxpYb69TEdy9y8yOJ3l0cTtwhbs/bWZnAVPdfTLJ46gvNbOvknyhjvY+TsZRViAiImK1dmFV5e5TSJ57X7rs9JL/PwN8sJY8VYGIiESozrfxNoQqEBGRGFmUvX29qAIREYlU5PVHBBWIt0jQvN/L2M/bi/UzGEiB7qL7Utcy5SlDP49qj/QyO3teakZQXU8kFBGRmikGIiIihUXaOHuXKhARkUhFXn+oAgkaaPGORu5PrLGTohq5P0UvJ/t9JtzQ9vo5NhThpXevmEg/fe3rOQ6kEVSBiIhEyKjfSPRGUQUiIhKpyOsPVSAiIrFSF5aIiBQSYSioF1Ug0OBAcIsEyBseDB9gwfYyOX7pjRxI2PCZcPt5AGKLBNYbyYj/eRuqQEREIhV7lakKREQkUjE8k6QaVSAiIpGKu/pQBSIiEiXNhRWjVgmY17Ocdd3nQTaqPXcXQtGy1ylAnrecDR3V3uAR7C0yi2/dmKkLS0REimmPfCi6KhARkQgZ8TeyVIGIiMRIj7RtslYZHLci5Sy8bgRljyH/rOwvtp7bD54N6hQ7qedAwroO4mtgXGQQDDZUDERERAqJvP5QBSIiEiu1QEREpGYKoouISDEGbZHXIAOrAolxkGAUAwJzrBdFOYOZ1TGvftxc6Idf19l4s3kVHEjYlAGIBcueK+uBFViPvegDqwIRERkwNBJdREQKMMAifyCIKhARkRgZtLWpBSIiIgWoC6tRBlLAfIX2pYEB8v4e5R7FbLx1mvG18ADzgsH33MWMcQS7ZvGtJPaitm4FIiIygBm6jVdERIrQZIoiIlKUYiAiIlJI5PVHi1QgsU7LHmNwOm/eudLFGKDvZ/UMDOcKROfIOpRX7nLWaQR7xfzrtJ6mgddcWCIiUpAZpnEgIiJShFogIiJSMwPa1QKJTYzxjjrGLXKXs0551XN7ubJp9CNmi5ahTj/0ohPaNnp7oYT1mtk3hll8I1XPu7DMbBJwPtAOXObu3wukORQ4g+RgP+Hun6qW5yCsQEREWkAdx4GYWTtwIbA3MAN4xMwmu/szJWk2BU4BPujub5nZmn3lG/lcjyIig5eZ5X71YWdgmrtPd/clwPXAgZk0xwIXuvtbAO7+el+ZqgIREYmUWf5XH8YBL5X8PSNdVmozYDMzu9/MHky7vKpSF5aISISScSA19WGNNrOpJX9f4u6XlGSXlQ0qdQCbAhOBdYE/m9nW7j6n0gbjrEDqFijt78eh9nMAO3e6QJq6BcgL5l14e3XUyMe0BmfVzZlVHmXZ55zFt24DEAPb7PcBenW8mSHGwYVW8wOlZrv7jhXemwGML/l7XWBmIM2D7r4UeN7MniWpUB6ptEF1YYmIRCl//CNHS+URYFMz29DMhgCfBCZn0vwW2APAzEaTdGlNr5ZpnC0QERGBOo0DcfcuMzseuI3kNt4r3P1pMzsLmOruk9P39jGzZ4BlwDfc/Y1q+aoCERGJVR270dx9CjAls+z0kv878LX0lYsqEBGRGBlYe9xRhjgqkNhmZo11RttguoL51ytA3owbB/pV3ivAggHlsqB2zs31sfmKygZzFwy+hzMrlleMAWyI4FG48T9RKo4KREREejFDs/GKiEhBaoGINJ6789acucyY+Qrz5i9gaddS3J2Ojg6GDhnCWmNGM3bttejs7Gx2UUVyUwukXzVwAFsUs+rWM96RI5ZRz4GEuQ5NvuM3Y+Yr3P/wo/z1kUf5x//+i5dmvsJLL79CZ2cH666zDiuPHEFnZwdt1kbXsi4WL17CK6/P4vXZb7Dm6DUYP3YdNlxvPDtvP4EP7rwD2261ReWKJfcTAgsOJMzTz150UGKeuEVwLF7RuEjOgX159jnGJxk2g1ogIitm4cJF3HL7Xdxy25385eFHWLBwER/caQc+sPMO7LvnRMavszbjx63DKiuvXDkTM5YuXcorr73Oiy/P5LkXXuSBqX/j8mtv4IWXZrDThG348G4f4JP/tj8bb7B+/+2cSCVmdRsH0iiqQCRKXV1d3PXn+7nupt8x+bY72Xm7bTlk//341tdPYLONN+o98jbn3VudnZ2st+441lt3HLvushNHHfZxAN6aM5cHpv6NKXfdzQf2+zgbb7Aehx98AIcd+DHWHDOmEbsnkks9nwfSCKpAJCpvz5vHxVdfx48vvZJxa6/F4QcfyDmnn8LaazbuRL7aqFXZb6892G+vPTjvrG9x5333c91Nv+X0c37Ivh+eyEknfJEJW2/VsO2LVKQWiEjf3p43jx9dcgUXXHYl++y+G7dcczkTtt6y38vR2dnJvntOZN89J6aV2S/52BGfY8LWW3LmSV9jh23f2+9lkkEqmY632aWoKu5hjlV54JVnNS9/NVKe7QXTBF6hdHleRfMKrtfd+9Xt5a9gft3BV/eyLi79xXVsustEnpv+PA/c+muuveg8Jmy5OXR3Q/eyvl8V8u5dzhz5dC9Lt5m8Vhkxgm986Viee+geJu2xOwce+Xk+9YXjefW11wLbyOxv8LgEylX0M+7v70ae73ajFd5egfNEX9vvJ9aW/9UMLVyBSKt7dtp0Pnzwp7j82hu441dXc9UFP2CTDTdodrHKDB06lOOPOZL//eufWH/8umy7x35cft0NeJQj52XAMMPa23K/mkEViPS7pUuX8u0fXciuB3yCgz82iftv/TXbbLlFs4vVp+HDh/Hd007ithuu4uKrrmPPjx/Bv6Y/3+xiyUBWx0cSNoIqEOlXs2a/wT6HHsl9DzzE1Nsmc+Lnj6a9vb3ZxarJhK235IEpN7P/R/Zk1/0P5Zbb72p2kWSgarP8r2YUrylblUHp8b8/w86TDuL9O23PlOt+zvrjs49kbh3t7e189QvH8LurL+FLJ3+L//7hBerSkrpKGhZ1e6BUQ+gurJB6zjCbK/8KgfVG5uXdOdIEN5hjvfJlN06+leNPOYOffPcMDj3go2mgeVnfeeUrVEE5fnR9jAx/3/bb8tCUmzjk88fzxN+f4efnn8vIESMqdCnkGFGeHdFesZxtfScJZpVnJtyCeQXl2J9YZ+ONQeS38aoFIg136TXX840zv8ftN1yVVB4DzNi11+Lum65lxPBh7PepzzF/wYJmF0kGhBriH4qByEB0xS9/xdnnXcBdv/5FU8Z19JehQ4dyxY/OZbNNNmL/zxzLggULm10kGQBi78JSBSINc/WNN3P6uedx543XRHl7br21tbVxyfe/zQbjx3HgUceycOGiZhdJWpkRfRC9RWIg9Yo1rEi6HGkKz2ibU55Zdbu7+04TyiuYdSCvPDGX7m5unnIbp3zn+9x5w5VstuF6+eIdRWcgrqvMDzHv7Ljpem3AZf/zbY7+yskccsxxTL76Ejo60p+Z58krdE0XOgbZGFYg77ZAXmUxlxU48eT5bkf5JMO8cabmi30uLLVApO6eePoffPHk07nlqovZYtNNml2cftfe3s7Pzz+X7u5uTjr7nGYXR1qZWiCy/Yf2Ytbs2Wyy0YYNyD1HSyLPegWThBK98NLLjFljdb5+5nerrNbCt7zmvCrs6lrGVTfezCOPP0lHezv57vrKXYiCSRp3opk2/QXGjF6Dv/1Z42LqIh2JHjNVIP1g1uw3mD+IgqobtPD4jnrq6Ghnmy3e0+xi9BvdfdYAkXdhqQLpB5tstAEAd9/6mwop8o7daJ0YSJ9lGKAxkHeF4g+hq/9sXqFZ8YKbyy7MGQOp1/4F7PGxg/tMIzXoCaJHTBXIiijcDVPHk2nhgYR5ypCjAslVyQTSFd3nenZ95TlR5j3BZk/83aEAb85KpUiaYKw6xyDBvIezaHeYBgmukNiD6KpARESi1LzgeF6qQEREYqUWiIiI1KwFnkioCkREJFaqQIpo4MjzRs742mcQ2MPpCgfMKR6czhMMD91NVba9nOtl9seDeRfdv9D2MoJ3N+UIhgfSWPDupkwZQmlCM+1mtxe87b/oWIDQcSk4i29I2bqB73c9R4ZnP/tQ3gNqdLpVuJMuHpFWICIiohaIiIjUzlALREREilAX1sDR0Jl2V2Akeq4R5TniBnniHcH1lgWShOIiy6r/DbAssCzXwMWiMZAccZHA89q9LfAM98wyC30Qec4FOcIW4YV5gxl5ZtDNvbBvhWfsFSD6Y6MKREQkRrqNV0REClMFIiIitVMMREREilILpEHq+kCifgqGe2CgVcWs8g4uzP6dM8icDXQH04SC2r3TeSjwvWxpYNmyvtPkCaw3eiBhNmi+LBAwb+/scz0PpAnHpjP7k2OMYsV0eRJltxccP5dzgF5FpWmLngDrOLAvzwDEGCkGIiIihUVegcTdwSYiMkgZhrW15X71mZ/ZJDN71symmdk3q6Q7xMzczHbsK09VICIisTLL/6qajbUDFwL7AlsCh5vZloF0KwMnAg/lKZ4qEBGRGPVMZZL3Vd3OwDR3n+7uS4DrgQMD6c4GzgXeyVPESGIgDXruddFAe10D9MENFNte0Ue+5prlNl/wvSxo3rWkfL2uUBA9s6yrK5B3juB7jsB+V9cy/vrUP5m/6B22f89GrL3GauEgemhEeSYYbqGAeUegDN2ZdIFj7gwpW2bZInjOPu+ydKFZaHPcdJF7RttQIerUP190Bt2GPy637GDVMe88ar6Nd7SZTS35+xJ3vyT9/zjgpZL3ZgC79Nqa2XbAeHe/1cz+X54NRlKBiNTHTX/6K8ed+1OWdi3DzFi8dCkHfWgXLjvtRIatNLTZxROpTW0V4mx3rxS3qDrXjZm1AecBR9eyQXVhyYDxp6lPcvTZ5/Pm2/OZt3ARby9YyOIlS/ndfQ9z2GnnNrt4IrXpuY23DjEQkhbH+JK/1wVmlvy9MrA1cI+ZvQC8D5jcVyBdFYgMGKf97BoWLS7vUntnyRLufvRJ/vnCjCaUSmQF1K8CeQTY1Mw2NLMhwCeByT1vuvtcdx/t7hu4+wbAg8AB7j41nF1CXVh51SP+UCnWUXTQYHDdnAPt8syqGxokmI15hOIdS8tP4p5dL5CGpYvLly3LxEoqxEC6u7uZ+o9p5e+VuPvBR9h8rVHLFwRjIL1/Et4Z6PbqDsQyOorFzbJxkeBpIHSZl42B5B78l10v5/eslsGFvZ5IGHo/R95CPacycfcuMzseuA1oB65w96fN7CxgqrtPrp5DmCoQGRDMjDYzllU4qRnQGZiaXSRqdaxc3X0KMCWz7PQKaSfmyVNdWDIgmBl777B1xd/bMnf23WVC/xZKZEXUNwbSEKpAZMD4zrGHMXxoeZfT8JWG8rl9JzJuzOpNKJVIUVbPcSANoQpEBoxtNl6Pu354KjtstiFDOzsYsdJQVh0xnFOPOIDzTziy2cUTqV3kLZBBGANp9CDBPEWo52y8mWXBR9P2vV7wMbShgX3ZoHkoYL40MIh1SWbZkkDAPJsmlH8osF8SWN9x7Bo8dO7XeW3O28xftJj1xqxOZ0c7LJxfvl4oJtKZCZBng/gA3YFBkJmbEnL/nDM/fA+cCCw0CDLP594WKEU9Z+Pt95NWswf29bOekegRG4QViAwGa41ahdIbrkRajx4oJSIiRUV+i7MqEBGRWKkCERGRmhnhSUAj0hoVyArNVtvA9fJlzvLgX6XtFB11HkpYv5HowYB1JrCeK2AO8M7CzN+LytMsDi3L5BWYxTdY9qzQqPOOwNd/6Ep12V4wNh0KamfL1RbIO9QPnmckep6bLkKB6NDo9FxDyuuokTP0RnFDQB4WvhEiIq1RgYiIDEZqgYiISCFRtoyWUwUiIhIj0228IiJSlFogLaho0L7Rwf6iZcizLBhED41EzwSVQ9Oyh4Lh2aB5aGR4aNmiTPA9G1Snwij6DAtdyWUD5gDDhvf+O3QjQXAD2aB2YHuhx+O2ZY5xaHS8B36mRT/3ouejWgLPfX2/Wzao3YRZLEI3f0REFYiISIzUhSUiIoVF2RJbThWIiEisdBvvQNHPg6ZyDQIrnn9Z3CCUd/Axt5k++9AjbUMz7WbjG/PmlqcJLPMFC3ovWBSIr4QG+2XzCQ0aHDasbJGNyMRY8sZAsrGLjvLH3pYdO8C6M+XK81lRYYbePLL5r8gjbQsbZLPqFmUaSCgiIkWpBSIiIoUoBiIiIrUztUBERKQAQzEQabCigxmzQoPxugN5ZYPKoUe+Bgb7lQ0kDAXM58wpX29u73Q+r3ywYfc7gUB+RttK5YP4bOWR5WVY2juv4M83+9hbgCFDe/89tDxAH34cb45H0+bR6IGE0hzqwhIRkULUhSUDyY6fPpFZc+ay8fh1kgXBZ4YEWiXZW21Dt/+GbsfNLgtsz0MtpYzgszhC04Zkb/cN3f7bEZiSJJuuPbBeYHvPzXiVMauN4tEbLy1PL4ObWfg7GhFVIFKTWXPmMj80FkMKmb9Qx1KqUBeWDCQ9LY8//fR7yYJFgQkQ334rsCwT33hzVlmSGGIgrLpq7zSjRpWnWX1M+bJVMunp1loLAAAGUklEQVRWWa08zbDy7e355dPK04kAugur6Zowe2ZZEbzkibYRlGdFZfclOItvKLDe+0rb5wcqnjfeKFu09PXelcrS2fPK0nQvDMwInNE2vDzw3Tm6PNjfmekyC41gtxGBimdZZlnouAyEz79U0/cn76N3W5TuwhIRkcLUAhERkUIUAxERkdrpeSAyGIUGwy3OzNC7cGFZkq5Z5YMLF70wu9ffs98oj1ssWtT3bLzDhpV/1UfPL5812DJXfB0rr1yeWXZfoPgAQJFKDLVARESkIMVARESkdqYWiIiIFGBoJLqIiBShgYRitnxsU09ztJ4DsLJN3FCTN8+y0N0eoUFMPeu1tff+t6+8st4pD4Yvef3tsmUvv9x7wOGrgUGDC5b1HcAesai8TIvfKQ++rz+i96y6HWMDMwuHZPc5eFyqXE32HOvQscvz+eX93Osp8u6VASHyY6wKREQkVmqBiIhIzcw0lYmIiBQUeQsk7tKJiAxmZvlffWZlk8zsWTObZmbfDLz/NTN7xsyeNLO7zGz9vvJUCyQGoQ8/ONFojnQ5g6nZEdceCvC2B6ZAT9ez9LGuHnq8a+hxritllgXKtODN8tHpL2ZGi//f4vKp2+d29R1EX7Wj/FppSeBBVKMzZRgeOp7ZfYHyfQ4cFwsdq54rzJ5jHfgcsp9VurD635WUpWtC8F1yqt9dWGbWDlwI7A3MAB4xs8nu/kxJsseAHd19oZkdB5wLHFYtX7VAREQiZWa5X33YGZjm7tPdfQlwPXBgaQJ3v9vde66gHgTW7StTtUBERGJk1NoCGW1mU0v+vsTdL0n/Pw54qeS9GcAuVfI6BvhDXxtUBSIiEqWan4k+2913rJxZmeCANDP7NLAjsHtfGxzgFUjuY9a/ss3NFRlYWJZXaJ9D/dw5Br51lMdA3h3oNiQdcNc9ojxNaGbarkzsYmz5Y2+Xdj1RtuzvC3vHQGYsLh/8tzTH8etcUn4M5gUGIL43G08ZO7Y8s9FrlS9bOfNI25UCx2XI0PJlPZ9fR+UYSPgqtJ9jGVHGRWIsU53V7y6sGcD4kr/XBWaWbc5sL+A0YHd3D0w73ZtiICIiMeqZzr0+d2E9AmxqZhua2RDgk8DkXpsz2w64GDjA3V/PU8QB3gIREWlV9bsLy927zOx44DagHbjC3Z82s7OAqe4+Gfg+MBL4VRqUf9HdD6iWryoQEZFY1bHr0N2nAFMyy04v+f9eteZpXs+J/WrduNmMVVdZZdyEbd7bR8oGl7Es+3puz3n8qacBmPDerWrIPpAouF6OzIKfsfedJrDs8X88C8CEzTdNkoTiHcsCTwjMxkAWlY/5WPLqmwC8sDiZMHGDoUN4bWHv9RYHyuQ5joEF+suHBn6caw1fHvd5YfES2kesxMajM/GNYcPLN5CNF7WXX5tZYKLEx/85DYAJW2yWJsobyyg4DqRsvRxpcmb1+FPJkIJ3v+crJHQc6pBtLdur4vEnn2LkyBHMeHlmw0q145ab+cPXXpQ7ffv2ez9aJYjeEM2uQB4DxgDTmlaI/rFJ+u9A2M/+2JcJ6b+PN3AbefRHOfTdaE2bALPcfbtGbWDHrd7jD19XQwUyYa9+r0Ca2oXVyIMvrcvM7gFw94kqhwxqkc+FpRiIiEisorx9ejlVICIiUdITCUVEpCi1QEREpGZmwbv5YhJ36UREBrEcs+w2lSoQEZFYKQYiIiI165kLK2KqQCRGmwAje8ZhNNEEYH6TyyCDlu7CEiliVrMLkJpPPGWRwUgtEJHaaIYCkVRg/rSYqAIREYlRvud8NJUqEBGRWCkGIiIihagFIiIitdNdWCIiUpRaICIiUowqEBERqZVGoouISGFx1x+qQERE4hV3DaIKREQkShpIKCIiRakCERGRYlSBiIhIEWqBiIhIzUwj0UVEpCi1QEREpBhVICIiUoCpBSIiIoWoAhERkdoZ6sISEZFi1AIREZGaaTZeEREpThWIiIgUoRaIiIgUEnf9oQpERCROmspERESKUBBdRESKUwUiIiJFqAUiIiK10yNtRUSksLgrEHP3ZpdBREQyzOyPwOgaVpnt7pMaVZ4QVSAiIlJI3DcZi4hItFSBiIhIIapARESkEFUgIiJSiCoQEREpRBWIiIgUogpEREQKUQUiIiKFqAIREZFC/j/PqMxKPXqsFQAAAABJRU5ErkJggg==\n",
"text/plain": [
"