import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set_style("darkgrid")
df = pd.read_csv("../data/Titanic.csv")
df = df.drop(["PassengerId", "Name", "Ticket", "Cabin"], axis=1)
# fill missing values
df = df.fillna({"Age": df.Age.median(), "Embarked": df.Embarked.mode()[0]})
# one-hot-encoding
df = pd.get_dummies(df, columns=["Pclass", "Sex", "SibSp", "Parch", "Embarked"])
# min-max-scaling
scaler = MinMaxScaler()
df[["Age", "Fare"]] = scaler.fit_transform(df[["Age", "Fare"]])
df.head()
x = df.drop("Survived", axis=1).values
y = df.Survived.values
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)
y_train = y_train.reshape((-1,1))
y_test = y_test.reshape((-1,1))
def sigmoid(x):
    return 1 / (1 + np.exp(-x))
def mean_squared_error(predictions, labels):
    N = labels.size 
    mse = ((predictions - labels)**2).sum() / (2*N)
    return mse
def accuracy(predictions, labels):
    predicions_correct = predictions.round() == labels
    accuracy = predicions_correct.mean()
    
    return accuracy
learning_rate = 0.1
epochs = 10000
n_input = 24
n_hidden = 4
n_output = 1
np.random.seed(10)
weights_1 = np.random.normal(scale=0.5, size=(n_input, n_hidden))   # (24, 4)
weights_2 = np.random.normal(scale=0.5, size=(n_hidden, n_output))  # (4, 1)
N = y_train.size
monitoring = {"mean_squared_error": [], "accuracy": []}
for epoch in range(epochs):
    # feedforward
    hidden_layer = sigmoid(np.dot(x_train, weights_1))        # (712, 4)
    output_layer = sigmoid(np.dot(hidden_layer, weights_2))   # (712, 1)
    
    # monitor training process
    acc = accuracy(output_layer, y_train)
    mse = mean_squared_error(output_layer, y_train)
    monitoring["accuracy"].append(acc)
    monitoring["mean_squared_error"].append(mse)
 
    # backpropagation
    output_layer_delta = (output_layer - y_train) * output_layer * (1 - output_layer)                 # (712, 1)
    hidden_layer_delta = np.dot(output_layer_delta, weights_2.T) * hidden_layer * (1 - hidden_layer)  # (712, 4)
    # weight updates
    weights_2 -= learning_rate * np.dot(hidden_layer.T, output_layer_delta) / N   # (4, 1)
    weights_1 -= learning_rate * np.dot(x_train.T, hidden_layer_delta) / N        # (24, 4)
monitoring_df = pd.DataFrame(monitoring)
fig, axes = plt.subplots(1, 2, figsize=(15,5))
monitoring_df.mean_squared_error.plot(ax=axes[0], title="Mean Squared Error")
monitoring_df.accuracy.plot(ax=axes[1], title="Accuracy");
# feedforward
hidden_layer_inputs = np.dot(x_test, weights_1)
hidden_layer_outputs = sigmoid(hidden_layer_inputs)
output_layer_inputs = np.dot(hidden_layer_outputs, weights_2)
output_layer_outputs = sigmoid(output_layer_inputs)
acc = accuracy(output_layer_outputs, y_test)
print("Accuracy: {}".format(acc))