NAME = ""
STUDENT_ID = ""


# TensorFlow and tf.keras
import tensorflow as tf
from tensorflow import keras

# Helper libraries
import numpy as np
import matplotlib.pyplot as plt

import numpy as np
import seaborn as sns
import pandas as pd


fashion_mnist = keras.datasets.fashion_mnist
# splitting training and test data and corresponding labels 
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()


class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
               'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
class_dict = {i:class_name for i,class_name in enumerate(class_names)}


def show_image(index):
    plt.figure()
    # cmap=plt.cm.binary allows us to show the picture in grayscale
    plt.imshow(train_images[index], cmap=plt.cm.binary)
    plt.title(class_names[train_labels[index]])
    plt.colorbar() # adds a bar to the side with values
    plt.show()


show_image(0)


train_images = # YOUR CODE HERE
test_images =  # YOUR CODE HERE


show_image(0)


# Lets sample our data to see what kind of images are stored.
# see documentation for subplot here:
# https://matplotlib.org/3.2.1/api/_as_gen/matplotlib.pyplot.subplot.html
plt.figure(figsize=(10,10))
for i in range(25):
    plt.subplot(5,5,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(train_images[i], cmap=plt.cm.binary)
    plt.xlabel(class_names[train_labels[i]])
plt.show()


print(f'Before reshape, train_images shape: {train_images.shape} test_images shape: {test_images.shape}')
train_images = # YOUR CODE HERE
test_images = # YOUR CODE HERE
print(f'Before reshape, train_images shape: {train_images.shape} test_images shape: {test_images.shape}')


# Add training data into a dataframe
img_data = {f"z{i}":train_images[:,i] for i in range(784)}
img_data["label"] = train_labels
df_img_train = pd.DataFrame(img_data)
df_img_train["class"] = df_img_train["label"].map(class_dict)
df_img_train.head()


# Add test data into a dataframe
img_data = {f"z{i}":test_images[:,i] for i in range(784)}
img_data["label"] = test_labels
df_img_test = pd.DataFrame(img_data)
df_img_test["class"] = df_img_test["label"].map(class_dict)
df_img_test.head()


from sklearn.manifold import TSNE
from sklearn.decomposition import PCA


from sklearn.preprocessing import StandardScaler


standardized_data = StandardScaler().fit_transform(train_images)


n_comps = 50
pca = PCA(n_components=n_comps)
pca_features = pca.fit_transform(standardized_data)


# Add data into a dataframe
pca_data = {f"z{i}":pca_features[:,i] for i in range(n_comps)}
pca_data["label"] = train_labels
df_pca = pd.DataFrame(pca_data)
df_pca["class"] = df_pca["label"].map(class_dict)
df_pca.head()


print(f'Compressed dimension of {train_images.shape[1]} to {n_comps} maintaining {pca.explained_variance_ratio_.sum()*100:.2f}% of information.')


model = TSNE(n_components=2, random_state=0, perplexity=30, learning_rate=200, n_iter=1000)


print('This may take a few minutes...')
# We are only using the first 10000 data points, this is sufficient for this application.
visualization_data = model.fit_transform(pca_features[:10000])
print('Done.')


data_to_visualize = {"z1":visualization_data[:,0], "z2":visualization_data[:,1], "label":train_labels[:10000]}
df_visualize = pd.DataFrame(data_to_visualize)
df_visualize["class"] = df_visualize["label"].map(class_dict)
df_visualize.head()


sns.lmplot(x='z1',
           y='z2',
           data=df_visualize, 
           fit_reg=False, 
           hue='class',
           height=9,
           scatter_kws={"s":50,"alpha":0.5})


sns.lmplot(x='z1',
           y='z2',
           data=df_visualize[(df_visualize["class"] == "Ankle boot") | (df_visualize["class"] == "Trouser")], 
           fit_reg=False, 
           hue='class',
           height=9,
           scatter_kws={"s":50,"alpha":0.5})


sns.lmplot(x='z1',
           y='z2',
           data=df_visualize[(df_visualize["class"] == "Pullover") | (df_visualize["class"] == "Coat")], 
           fit_reg=False, 
           hue='class',
           height=9,
           scatter_kws={"s":50,"alpha":0.5})


def get_data_subset(df, classes=[], shuffle=True, shuffle_seed=42):
  if classes == []:
    return None
  else:
    df_filtered = df[(df["class"] == classes[0]) | (df["class"] == classes[1])].copy()
    df_filtered["binary_label"] = 0
    df_filtered.loc[df["class"] == classes[1], "binary_label"] = 1
    data = df_filtered.filter(regex=("z[0-9]+")).values
    labels = df_filtered["binary_label"].values
    if shuffle:
      np.random.seed(shuffle_seed)
      np.random.shuffle(data)
      np.random.seed(shuffle_seed)
      np.random.shuffle(labels)
  
  return data, labels.reshape(-1,1)


def train_test_validation_split(X, y, test_size=.20, validation_size=.20):
    trainIdx = int((1. - test_size - validation_size)*X.shape[0])
    testIdx = int((1. - test_size)*X.shape[0])
    validationIdx = int(1.0 * X.shape[0])
    X_train, y_train = X[:trainIdx], y[:trainIdx]
    X_test, y_test = X[trainIdx:testIdx], y[trainIdx:testIdx]
    X_val, y_val = X[testIdx:validationIdx], y[testIdx:validationIdx]
    return X_train, y_train, X_test, y_test, X_val, y_val

def bias_trick(X):
    return np.hstack((np.ones((X.shape[0],1)), X))


# applying sigmoid function (equation 6) to the vector input of z_i's
def sigmoid(z):  
    sig_z = # YOUR CODE HERE
    return sig_z

# implementing equation 8    
def binary_cross_entropy(y, h):
  n = len(y)
  return # YOUR CODE HERE

# returns the derivation of the BCE function based on equation 9
def binary_cross_entropy_derivative(X, y, theta): 
    z = # YOUR CODE HERE
    h = # YOUR CODE HERE
    n = len(y)
    return # YOUR CODE HERE

# returns percentage of correct predictions    
def accuracy(y, h): 
    return # YOUR CODE HERE

# returns predictions for all inputs. Given input is h_i based on equations 7.
def predict(h):
    return # YOUR CODE HERE
    

def logistic_regression(X, y, learning_rate, num_steps):

    # split your data into train and test subsets. Don't forget to apply the bias trick before you split!
    X = bias_trick(X)
    X_train, y_train, X_test, y_test, X_val, y_val = train_test_validation_split(X,y)

    # start with intial parameters theta_i = 0, how many parameters/weights do we need?
    theta = # YOUR CODE HERE

    
    z = np.dot(X_train,theta)
    h = sigmoid(z)
    print(f'Initial Accuracy:{accuracy(y_train, predict(h)):.4f}')
    val_losses = []
    train_losses = []
    val_accuracies = []
    train_accuracies = []
    
    for step in range(num_steps):
        # Calculate the current output of your logistic network (h_train and h_val)
        z_train = # YOUR CODE HERE
        h_train = # YOUR CODE HERE

        z_val = # YOUR CODE HERE
        h_val = # YOUR CODE HERE

        # Calculate your current training/validation accuracy and BCE loss
        training_accuracy = # YOUR CODE HERE
        training_loss = # YOUR CODE HERE
        validation_accuracy = # YOUR CODE HERE
        validation_loss = # YOUR CODE HERE

        val_losses.append(validation_loss)
        val_accuracies.append(validation_accuracy)
        train_losses.append(training_loss)
        train_accuracies.append(training_accuracy)
  
        # Calculate the gradient using the derivative of your loss function.
        gradient = # YOUR CODE HERE
        
        # Adjust your weights
        theta = # YOUR CODE HERE
        
        print(f'Epoch [{step+1}/{num_steps}] '.ljust(20) + f'loss: {training_loss:.4f} - accuracy: {training_accuracy:.4f} - ', end='')
        print(f"val_loss: {validation_loss:.4f} - val_accuracy: {validation_accuracy:.4f}")
    return theta, train_losses, train_accuracies, val_losses, val_accuracies


X, y = get_data_subset(df_img_train, classes=["Trouser", "Ankle boot"])


# train the model until it converges, use the plotted losses below to verify
learning_rate = 0.05
num_steps = 100
theta, train_losses, train_accuracies, val_losses, val_accuracies = logistic_regression(X, y, learning_rate, num_steps)


import matplotlib.pyplot as plt
# plot your training accuracy and validation accuracy curves together
plt.plot(train_accuracies)
plt.plot(val_accuracies)
plt.show()

# plot your training losses and validation losses curves together
plt.plot(train_losses)
plt.plot(val_losses)
plt.show()


X, y = get_data_subset(df_img_train, classes=["Pullover", "Coat"])


# train the model until it converges, use the plotted losses below to verify
# modify learning_rate and num_steps to accomplish this
learning_rate = 0.01
num_steps = 100
theta, train_losses, train_accuracies, val_losses, val_accuracies = logistic_regression(X, y, learning_rate, num_steps)


import matplotlib.pyplot as plt
# plot your training accuracy and validation accuracy curves together
plt.plot(train_accuracies)
plt.plot(val_accuracies)
plt.show()

# plot your training losses and validation losses curves together
plt.plot(train_losses)
plt.plot(val_losses)
plt.show()


from tensorflow.keras.layers import Input, Dense # only use these layers
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import * # you can use any optimizer

# Define an input layer with the correct shape for your data
input_layer = # YOUR CODE HERE


# Pass the input layer's output to a dense of size 100, choose whatever activation you think is suitable
x = # YOUR CODE HERE

# Pass the previouses hidden layer's output to a dense of size 1 for classification, what activation do we need to use?
output = # YOUR CODE HERE

# Define a model with it's input as your input layer and output as your output layer
model = # YOUR CODE HERE


# Show a summary of your model
model.summary()


# Compile your model with your chosen optimizer, binary cross entropy for the loss, and accuracy as the metric

# YOUR CODE HERE


X, y = get_data_subset(df_img_train, classes=["Pullover", "Coat"])

# Call fit on your model passing in the X, y data above with validation split of 0.2 and train for 100 epochs
hist = # YOUR CODE HERE


def plot_losses(hist):
    plt.plot(hist.history['loss'])
    plt.plot(hist.history['val_loss'])
    plt.title('Model Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Val'])
    plt.show()
def plot_accuracies(hist):
    plt.plot(hist.history['accuracy'])
    plt.plot(hist.history['val_accuracy'])
    plt.title('Model Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Val'])
    plt.show()


# plot your losses and accuracies
plot_losses(hist)
plot_accuracies(hist)

Module 2: Logistic Regression and Neural Network¶

Data Exploration and Preprocessing¶

Load the Fashion-MNIST dataset¶

Question 1: Data Preprocessing¶

Question 1.1) Normalizing the data¶

Question 1.2) Data visualization¶

Projecting the classes in 2D for visualization¶

Question 2: Linearly Separable¶

YOUR ANSWER HERE¶

Question 3: Not Linearly Separable¶

YOUR ANSWER HERE¶

Question 4: Logistic Regression¶

Question 4.1) Observation¶

YOUR ANSWER HERE¶

Question 4.2) Observation¶

YOUR ANSWER HERE¶

Question 4.3) Comparison¶

YOUR ANSWER HERE¶

Question 5: Neural Networks¶

Question 5.1) Observation¶

YOUR ANSWER HERE¶