import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline
sns.set_style("darkgrid")

Data preparation¶

df = pd.read_csv("../data/Titanic.csv")
df = df.drop(["PassengerId", "Name", "Ticket", "Cabin"], axis=1)

# fill missing values
df = df.fillna({"Age": df.Age.median(), "Embarked": df.Embarked.mode()[0]})

# one-hot-encoding
df = pd.get_dummies(df, columns=["Pclass", "Sex", "SibSp", "Parch", "Embarked"])

# min-max-scaling
scaler = MinMaxScaler()
df[["Age", "Fare"]] = scaler.fit_transform(df[["Age", "Fare"]])

df.head()

Train-Test-Split¶

x = df.drop("Survived", axis=1).values
y = df.Survived.values

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)

y_train = y_train.reshape((-1,1))
y_test = y_test.reshape((-1,1))

Helper Functions¶

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def mean_squared_error(predictions, labels):
    N = labels.size 
    mse = ((predictions - labels)**2).sum() / (2*N)
    return mse

def accuracy(predictions, labels):
    predicions_correct = predictions.round() == labels
    accuracy = predicions_correct.mean()
    
    return accuracy

Hyperparameters¶

learning_rate = 0.1
epochs = 10000

n_input = 24
n_hidden = 4
n_output = 1

Weight Initialization¶

np.random.seed(10)
weights_1 = np.random.normal(scale=0.5, size=(n_input, n_hidden))   # (24, 4)
weights_2 = np.random.normal(scale=0.5, size=(n_hidden, n_output))  # (4, 1)

Training the Neural Network¶

N = y_train.size
monitoring = {"mean_squared_error": [], "accuracy": []}
for epoch in range(epochs):

    # feedforward
    hidden_layer = sigmoid(np.dot(x_train, weights_1))        # (712, 4)
    output_layer = sigmoid(np.dot(hidden_layer, weights_2))   # (712, 1)
    
    # monitor training process
    acc = accuracy(output_layer, y_train)
    mse = mean_squared_error(output_layer, y_train)
    monitoring["accuracy"].append(acc)
    monitoring["mean_squared_error"].append(mse)
 
    # backpropagation
    output_layer_delta = (output_layer - y_train) * output_layer * (1 - output_layer)                 # (712, 1)
    hidden_layer_delta = np.dot(output_layer_delta, weights_2.T) * hidden_layer * (1 - hidden_layer)  # (712, 4)

    # weight updates
    weights_2 -= learning_rate * np.dot(hidden_layer.T, output_layer_delta) / N   # (4, 1)
    weights_1 -= learning_rate * np.dot(x_train.T, hidden_layer_delta) / N        # (24, 4)

monitoring_df = pd.DataFrame(monitoring)

fig, axes = plt.subplots(1, 2, figsize=(15,5))

monitoring_df.mean_squared_error.plot(ax=axes[0], title="Mean Squared Error")
monitoring_df.accuracy.plot(ax=axes[1], title="Accuracy");

Test Data¶

# feedforward
hidden_layer_inputs = np.dot(x_test, weights_1)
hidden_layer_outputs = sigmoid(hidden_layer_inputs)

output_layer_inputs = np.dot(hidden_layer_outputs, weights_2)
output_layer_outputs = sigmoid(output_layer_inputs)

acc = accuracy(output_layer_outputs, y_test)
print("Accuracy: {}".format(acc))

Accuracy: 0.8156424581005587

	Survived	Age	Fare	Pclass_1	Pclass_3	Sex_female	Sex_male	SibSp_0	SibSp_1	...	Parch_0	Embarked_C	Embarked_S
0	0	0.271174	0.014151	0	1	0	1	0	1	...	1	0	1
1	1	0.472229	0.139136	1	0	1	0	0	1	...	1	1	0
2	1	0.321438	0.015469	0	1	1	0	1	0	...	1	0	1
3	1	0.434531	0.103644	1	0	1	0	0	1	...	1	0	1
4	0	0.434531	0.015713	0	1	0	1	1	0	...	1	0	1