Let's import some libraries first...

In [1]:
import pandas
from pandas.plotting import scatter_matrix

from sklearn import datasets
from sklearn import model_selection
from sklearn import linear_model

# models
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score

import matplotlib.pyplot as plt

Load and examine dataset...

In [2]:
dataset = pandas.read_csv("data/ccp-consumer-lending-half-year.csv")
print (dataset)

   period   npbt  avg_gross_loan_book  net_lending  revenue
0  1HFY14  -2830              27424.5        17511     7000
1  2HFY14   -692              49041.0        31619    12104
2  1HFY15   2907              67541.0        16220    16603
3  2HFY15  -1400              85802.0        34800    19223
4  1HFY16   2053             110302.0        31900    26204
5  2HFY16   6100             127976.5        23200    26214
6  1HFY17   4513             145476.5        30391    29735
7  2HFY17  13083             158238.0        15793    36639
8  1HFY18   8292             166238.0        24394    37566
9  2HFY18  14736             177548.0        28011    41770


In [46]:
array = dataset.values
num_data_points = 3

for i in range(0, len(array) - (num_data_points - 1)):
    begin = i
    end = i + (num_data_points - 1)
    
    X = array[begin:end+1,2:4]   # data = avg_gross_loan_book, net_lending
    Y = array[begin:end+1,1]     # result = NPAT

    model = LinearRegression()
    model.fit(X, Y) # train model
    
    print("Period %s to %s: " % (array[begin,0], array[end,0]), end =" ")
    print("p = %sbr + %sl + %s" % (model.coef_[0], model.coef_[1], model.intercept_))

Period 1HFY14 to 1HFY15:  p = 0.140936039634br + -0.064399199089l + -5567.40604369
Period 2HFY14 to 2HFY15:  p = 0.000873579448887br + -0.232666977089l + 6621.85593883
Period 1HFY15 to 1HFY16:  p = 0.101672186835br + -0.331734973293l + 1420.70009581
Period 2HFY15 to 2HFY16:  p = 0.113066609345br + -0.235471748635l + -2906.92436252
Period 1HFY16 to 1HFY17:  p = 0.0547529021572br + -0.353939061014l + 7304.3014326
Period 2HFY16 to 2HFY17:  p = 0.110760929767br + -0.490240059925l + 3298.77326195
Period 1HFY17 to 1HFY18:  p = 0.0166480875203br + -0.572513045014l + 19490.3384469
Period 2HFY17 to 2HFY18:  p = 1.0645668191br + -1.54720783081l + -130936.871049


Where
```
p = Net profit before tax (NPBT).
r = Reporting period. Full year = 2, half year = 1.
b = Average gross loan book.
l = Net lending for the period.
```

## FY19 Predictions

Assumptions:

* Average gross loan book will be $196m.

* Net lending will be $50m, on the upper range of the forecast. Quoting a high number here will actually reduce EBIT.


In [47]:
b = 196000
l = 50000
r = 2

p = model.coef_[0] * b * r + model.coef_[1] * l + model.intercept_

print("EBIT = %s" % p)
print("NPAT = %s" % (p * 0.7))

EBIT = 209012.930498
NPAT = 146309.051349
