Basic regression with keras- Simulated datas
Linear regression with keras with simulated datas
In this serie we will be dealing with a simple csv data file which structure is:
indf | ndvi | dnbr |
---|---|---|
Basically, these datas are simulated random ndvi and $dnbr=3*ndvi+1+\epsilon$ with $\epsilon\sim \mathcal{N}(0,0.1)$ and $ ndvi\sim \mathcal{U}[0,1]$
As of indf: it is our primary key column. Three indfs shuffled files permit us to train, validate and test our algorithm.
The few next lines are just a tip for final layout centering.
from IPython.core.display import HTML as Center
Center(""" <style>
.output_png {
display: table-cell;indf
text-align: center;
vertical-align: middle;
height:1600;
width:600;
}
</style> """)
Let’s begin by charging the necessary libraries:
import os
import re
import random
from matplotlib import pyplot as plt
from numpy.polynomial.polynomial import polyfit
from scipy.stats import gaussian_kde
import pandas as pd
import numpy as np
from tensorflow import keras
import keras.backend as K
from tensorflow.keras import layers
from generator_from_one_file import DataGenerator
Then we can prepare the datas based on those three files we talked about earlier:
data_file = "test_data.csv"
training_ids_file="training.csv"
#training_ids_file=data_path+"train.csv"
test_ids_file="test.csv"
#test_ids_file=data_path+"extra.csv"
validation_ids_file="validation.csv"
#validation_ids_file=data_path+"val.csv"
Doing a large usage of generators (one can refer to generator_from_one_file.py):
batch_size=1
training=DataGenerator(training_ids_file,data_file,batch_size)
training_count=training.get_infos()
test=DataGenerator(test_ids_file,data_file,batch_size)
test_count=training.get_infos()
validation=DataGenerator(validation_ids_file,data_file,batch_size)
validation_count=validation.get_infos()
Time now to implement the network: a very simple one neuron-one layer without activation function:
def build_model():
model = keras.Sequential([
layers.Input(shape=(1,)),
layers.Dense(1,activation="linear"),
])
opt = keras.optimizers.Adam()
model.compile(optimizer=opt, loss="mse", metrics=["mae"])
return model
We write a simple callback function that will feed a dictionnary with both weights on each epoch:
class GetWeights(keras.callbacks.Callback):
def __init__(self):
super(GetWeights, self).__init__()
self.weight_dict = {}
def on_epoch_end(self, epoch, logs=None):
w = model.layers[0].get_weights()[0]
b = model.layers[0].get_weights()[1]
if epoch == 0:
# create array to hold weights and biases
self.weight_dict['w_'] = w
self.weight_dict['b_'] = b
else:
# append new weights to previously-created weights array
self.weight_dict['w_'] = np.dstack(
(self.weight_dict['w_'], w))
# append new weights to previously-created weights array
self.weight_dict['b_'] = np.dstack(
(self.weight_dict['b_'], b))
gw = GetWeights()
Let’s define the Callback functions that will allow us to analyse our model
callbacks = [
keras.callbacks.ModelCheckpoint('one_file.keras',
save_best_only=True),
keras.callbacks.TensorBoard(log_dir="./logs",histogram_freq=1),
gw
]
Build and learn:
model=build_model()
model.summary()
history = model.fit(training,
batch_size=batch_size,
steps_per_epoch=int(training_count/batch_size),
epochs=10,
validation_data=validation,
validation_steps=int(validation_count/batch_size),
callbacks=callbacks,
use_multiprocessing=True,
workers=30,
)
2022-04-25 09:00:30.903772: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-04-25 09:00:31.985987: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 93 MB memory: -> device: 0, name: Quadro P2000, pci bus id: 0000:3b:00.0, compute capability: 6.1
2022-04-25 09:00:31.986782: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 4188 MB memory: -> device: 1, name: Quadro P2000, pci bus id: 0000:af:00.0, compute capability: 6.1
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense (Dense) (None, 1) 2
=================================================================
Total params: 2
Trainable params: 2
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
1/3333 [..............................] - ETA: 1:04:36 - loss: 1.7924 - mae: 1.3388WARNING:tensorflow:Callback method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0023s vs `on_train_batch_end` time: 0.0032s). Check your callbacks.
3333/3333 [==============================] - 18s 5ms/step - loss: 0.7364 - mae: 0.6166 - val_loss: 0.0383 - val_mae: 0.1628
Epoch 2/10
3333/3333 [==============================] - 18s 5ms/step - loss: 0.0227 - mae: 0.1213 - val_loss: 0.0119 - val_mae: 0.0871
Epoch 3/10
3333/3333 [==============================] - 18s 5ms/step - loss: 0.0105 - mae: 0.0818 - val_loss: 0.0096 - val_mae: 0.0784
Epoch 4/10
3333/3333 [==============================] - 18s 5ms/step - loss: 0.0100 - mae: 0.0798 - val_loss: 0.0096 - val_mae: 0.0783
Epoch 5/10
3333/3333 [==============================] - 17s 5ms/step - loss: 0.0100 - mae: 0.0797 - val_loss: 0.0099 - val_mae: 0.0793
Epoch 6/10
3333/3333 [==============================] - 18s 5ms/step - loss: 0.0100 - mae: 0.0799 - val_loss: 0.0095 - val_mae: 0.0782
Epoch 7/10
3333/3333 [==============================] - 18s 5ms/step - loss: 0.0100 - mae: 0.0799 - val_loss: 0.0095 - val_mae: 0.0782
Epoch 8/10
3333/3333 [==============================] - 18s 5ms/step - loss: 0.0100 - mae: 0.0797 - val_loss: 0.0096 - val_mae: 0.0787
Epoch 9/10
3333/3333 [==============================] - 18s 5ms/step - loss: 0.0100 - mae: 0.0797 - val_loss: 0.0095 - val_mae: 0.0783
Epoch 10/10
3333/3333 [==============================] - 18s 5ms/step - loss: 0.0100 - mae: 0.0797 - val_loss: 0.0096 - val_mae: 0.0782
Now, we load the infos and evaluate the net:
##################Checking History############################
model = keras.models.load_model('one_file.keras')
print(f"Test MAE: {model.evaluate(test,batch_size=batch_size,steps=int(test_count/batch_size))[1]:.2f}")
loss = history.history["mae"]
val_loss = history.history["val_mae"]
epochs = range(1, len(loss) + 1)
3333/3333 [==============================] - 5s 1ms/step - loss: 0.0100 - mae: 0.0803
Test MAE: 0.08
first_layer_weights = model.layers[0].get_weights()[0]
first_layer_biases = model.layers[0].get_weights()[1]
print("w = "+str(first_layer_weights[0][0]))
print("b = "+str(first_layer_biases[0]))
print()
print("History of weight: \n"+str(gw.weight_dict['w_']))
print("History of bias: \n"+str(gw.weight_dict['b_']))
w = 3.0074782
b = 0.9947739
History of weight:
[[[2.4153347 2.839952 2.9898615 3.003608 3.007525 3.0061684 3.0074782
2.9995196 2.9962094 3.01626 ]]]
History of bias:
[[[1.2983377 1.0869718 0.99981517 1.0040857 1.0135202 0.9952645
0.9947739 0.99021834 0.99723023 0.9953797 ]]]
Function to plot the results:
###################################################################
# Plot Result function
####################################################################
def get_plot_datas(ids_file,data_file):
ids_file=pd.read_csv(ids_file)
data_file=pd.read_csv(data_file)
data_file=data_file[['indf','dnbr','ndvi']]
datas=data_file[(data_file['indf'].isin(ids_file.indf))]
return datas[['ndvi','dnbr']].to_numpy()
Let’s have a brief look at the datas: nb: what we call the mean model is simply: $$y_{pred}=\bar y_{obs}$$ In that case the mse is the variance.
#################Datas summary####################
datas=get_plot_datas(test_ids_file,data_file)
x=datas[:,0]
y=datas[:,1]
print("Observed statistics:")
print()
print("mean dnbr = "+str(np.mean(y)))
print("dnbr variance (mse with mean model) = "+str(np.square(np.std(y))))
print()
print()
y_pred=model.predict(x)
print("mean dnbr predicted = "+str(np.mean(y_pred[:,0])))
print("mae_from_mean_model ="+str(np.sum(np.abs(y-np.mean(y)))/len(y)))
Observed statistics:
mean dnbr = 2.481034895608084
dnbr variance (mse with mean model) = 0.7607728299290712
mean dnbr predicted = 2.4805243
mae_from_mean_model =0.7521578471518485
In the case of the linear model, the mse being:$$f(a,b)=\frac{1}{n}\Sigma(ax_{obs}+b-y_{obs})²$$ the gradient coordinates are: $$\frac{\partial f}{\partial a}=\frac{2}{n}\Sigma x_{obs}(ax_{obs}+b-y_{obs})$$ and $$\frac{\partial f}{\partial b}=\frac{2}{n}\Sigma (ax_{obs}+b-y_{obs})$$ the sum being taken on the batch data.
The minimun is thus obtained solving $$\frac{\partial f}{\partial a}=0$$ and $$\frac{\partial f}{\partial b}=0$$ which leads to: $$a=\frac{cov(x,y)}{s²_{x}}$$ and $$b=\bar{y}-a\bar{x}$$
We compare the performance of our NN with that elementary model in the next few lines:
a,b=np.polyfit(x,y,1)
print("Results from the linear model:")
print()
print("a (supposed to be equivalent to w in the last section) = "+str(a))
print("b = "+str(b))
linear_mae=sum(abs(y-(a*x+b)))/len(y)
linear_mse=sum(np.square(y-(a*x+b)))/len(y)
print("linear_model_mae = "+str(linear_mae))
print("linear_model_mse = "+str(linear_mse))
print()
print("NN model results:")
Observed statistics:
mean dnbr = 2.481034895608084
dnbr variance = 0.7607728299290712
Results from the linear model:
a (supposed to be equivalent to w in the last section) = 3.001565491043912
b = 0.9982055323841632
linear_model_mae = 0.0803045100039715
linear_model_mse = 0.009984939104477841
NN model results:
mean dnbr predicted = 2.4805243
mae_from_mean_model =0.7521578471518485
Ready to plot:
##############################PLOTS#######################################
abscisses=[0,1]
ordonnees=[0,1]
figure,axis = plt.subplots(3,2,figsize = (20, 20))
xy = np.vstack([x,y])
z = gaussian_kde(xy)(xy)
axis[0,0].scatter(x,y,c=z,s=1)
axis[0,0].plot(x,a*x+b,'-',color='black',label="linear model")
axis[0,0].plot(x,first_layer_weights[0][0]*x+first_layer_biases[0],c="red",label="NN model")
axis[0,0].set_title("DNBR vs NDVI: Both models")
axis[0,0].legend()
axis[0,1].plot(epochs, loss, "bo", label="Training MAE")
axis[0,1].plot(epochs, val_loss, "b", label="Validation MAE")
axis[0,1].set_title("Training and validation MAE")
axis[0,1].legend()
xy = np.vstack([y,y_pred[:,0]])
z = gaussian_kde(xy)(xy)
axis[1,0].scatter(y,y_pred[:,0],c=z,s=1)
axis[1,0].plot(abscisses,ordonnees,label="y=x")
axis[1,0].set_title("NN Prévisions vs observées")
axis[1,0].legend()
res=np.subtract(y_pred[:,0],y)
xy = np.vstack([x,res])
z = gaussian_kde(xy)(xy)
axis[1,1].scatter(x,res,c=z,s=1)
axis[1,1].set_title("Résidus vs NDVI")
axis[2,0].hist(y_pred)
axis[2,0].set_title("Predictions distribution")
axis[2,1].hist(y)
axis[2,1].set_title("dnbr distribution")
plt.show()