Added code.

Erik Spence [2017-05-29 16:20:46]

Added code.

Filename
code/first_network.py
code/mnist_loader.py
code/plotting_routines.py
code/second_network.py

diff --git a/code/first_network.py b/code/first_network.py
new file mode 100644
index 0000000..9a06572
--- /dev/null
+++ b/code/first_network.py
@@ -0,0 +1,188 @@
+#
+# Introduction to Neural Networks.
+# Given at SciNet, May 30 2017, by Erik Spence.
+#
+# This file, first_network.py, contains the implementation of our
+# first neural network.
+#
+
+#######################################################################
+
+
+"""
+first_network contains the implementation of our single-node neural
+network.  Use 'build_model' to train the network.
+"""
+
+
+#######################################################################
+
+
+import numpy as np
+import numpy.random as npr
+
+
+#######################################################################
+
+
+def sigma(x, model):
+
+    """
+    Returns the sigmoid function evaluated at z, where z is the
+    product of the model parameters with x.
+
+    Inputs:
+
+    - x: 2D array of floats of shape (num_points, 2), containing the
+      2D position of the data points.  num_points is the number of
+      data points.
+
+    - model: dictionary containing the model parameters.  These model
+      parameters should include:
+
+        - 'w1': float, weight which multiplies the x dimension of the
+          data.
+
+        - 'w2': float, weight which multiplies the y dimension of the
+          data.
+
+        - 'b1': float, bias for the network.
+
+    Outputs:
+
+    - vector of floats of length num_points.
+
+    """
+
+    # Calculate z.
+    z = model['w1'] * x[:,0] + model['w2'] * x[:,1] + model['b']
+
+    # Return the result.
+    return 1. / (1. + np.exp(-z))
+
+
+#######################################################################
+
+
+# The prediction function.  This function runs the data, in the
+# forward direction, through the neural network.  Though it is
+# obviously redundant for our first neural network, it is included for
+# consistency with the second neural network example.
+def predict(x, model):
+
+    """
+    The predict function runs the data through a forward pass of the
+    neural network, and returns the output.  For our first network
+    this simply means invoking the sigmoid function on the input data.
+
+    Inputs:
+
+    - x: 2D array of floats of shape (num_points, 2), containing the
+      2D position of the data points.  num_points is the number of
+      data points.
+
+    - model: dictionary containing the model parameters.
+
+    Outputs:
+
+    - vector of floats of length num_points.
+
+    """
+
+    # Return the sigma function.
+    return sigma(x, model)
+
+
+#######################################################################
+
+
+# Chomsky, Heisenberg and Goedel walk into a bar.
+#
+# Heisenberg says: "I can tell we're in a joke, but I can't tell if
+# it's funny."
+#
+# Goedel says: "We can't tell if it's funny because we're inside the
+# joke."
+#
+# Chomsky says: "The joke's funny, you're just not telling it right."
+
+
+#######################################################################
+
+
+def build_model(x, v, eta = 0.01, num_steps = 10000,
+                print_best = True):
+
+    """
+    This function uses gradient descent to update the neural network's
+    model parameters, minimizing the quadradic cost function.  It
+    returns the best model.
+
+    Inputs:
+
+    - x: 2D array of floats of shape (num_points, 2), containing the
+      2D position of the data points.  num_points is the number of
+      data points.
+
+    - v: integer vector of length num_points, containing the correct
+      values (0 or 1) for the data.
+
+    - eta: float, the stepsize parameter for the gradient descent.
+
+    - num_steps: integer, number of steps to iterate through the
+      training data for gradient descent.
+
+    - print_best: boolean, if True, print the model accuracy every
+      1000 iterations.
+
+    Outputs:
+
+    - dictionary containing the parameters of the best model.
+
+    """
+
+    # Initialize the parameters to random values. We need to learn
+    # these.
+    model = {'w1': npr.random(), 'w2': npr.random(),
+             'b': npr.random()}
+
+    # A scaling factor used in determining the best model.
+    scale = 100. / float(len(v))
+
+    # Initialize the score of our best model.
+    best = 0.0
+
+    # Forward propagation, to initialize f.
+    f = sigma(x, model)
+
+    # Gradient descent.
+    for i in xrange(0, num_steps):
+
+        # Calculate the derivatives.
+        temp = (f - v) * f * (1 - f)
+        dCdw1 = sum(temp * x[:, 0])
+        dCdw2 = sum(temp * x[:, 1])
+        dCdb  = sum(temp)
+
+        # Update the parameters
+        model['w1'] -= eta * dCdw1
+        model['w2'] -= eta * dCdw2
+        model['b']  -= eta * dCdb
+
+        # Check to see if this is our best model yet.
+        f = sigma(x, model)
+        score = sum(np.round(f) == v) * scale
+
+        # Keep the best model.
+        if (score > best):
+            best, bestmodel = score, model.copy()
+
+        # Optionally print the score.
+        if (print_best) and (i % 1000 == 0):
+            print "Best by step %i: %.1f %%" % (i, best)
+
+    print "Our best model gets %.1f percent correct!" % best
+
+    # Return the best parameters
+    return bestmodel
+
diff --git a/code/mnist_loader.py b/code/mnist_loader.py
new file mode 100644
index 0000000..59d3e71
--- /dev/null
+++ b/code/mnist_loader.py
@@ -0,0 +1,127 @@
+#
+# Introduction to Neural Networks.
+# Given at SciNet, May 30 2017, by Erik Spence.
+#
+# This file, mnist_loader.py, contains the code needed to load the
+# MNIST dataset.  The code borrows heavily from
+# http://neuralnetworksanddeeplearning.com.
+#
+
+#######################################################################
+
+
+"""
+mnist_loader contains the code needed to load the MNIST dataset,
+both 1D and 2D versions.  The code has been heavily borrowed from
+http://neuralnetworksanddeeplearning.com.
+
+"""
+
+
+#######################################################################
+
+
+try:
+    import cPickle
+except:
+    import pickle as cPickle
+
+import gzip
+import numpy as np
+
+
+#######################################################################
+
+
+def load_mnist_1D(filename = '../data/mnist.pkl.gz'):
+
+    """
+    Returns the MNIST data as a tuple containing the training data,
+    the validation data, and the test data.
+
+    Inputs:
+
+    - filename: string, name of the file containing the data.
+
+    Outputs:
+
+    - tuple, containing the training, validation and test data.  These
+      take the form:
+
+        - training_data: tuple, consisting of:
+
+            - 2D array of floats of shape (50000, 784), containing the
+              pixel values for each image.
+
+            - integer vector of length 50000, containing the value of
+              the number in the image.
+
+        - validation_data: same as training_data, except with length
+          10000
+
+        - test_data: same as training_data, except with length
+          10000
+
+    """
+
+    # Open the file.
+    f = gzip.open(filename, 'rb')
+
+    # Load the data.
+    training_data, validation_data, test_data = cPickle.load(f)
+
+    # Close the file.
+    f.close()
+
+    # Return the values.
+    return training_data[0], training_data[1], \
+        validation_data[0], validation_data[1], \
+        test_data[0], test_data[1]
+
+
+#######################################################################
+
+
+def load_mnist_2D(filename = ''):
+
+    """
+    Returns the MNIST data as a tuple containing the training data,
+    the validation data, and the test data.
+
+    Inputs:
+
+    - filename: string, name of the file containing the data.
+
+    Outputs:
+
+    - tuple, containing the training, validation and test data.  These
+      take the form:
+
+        - training_data: tuple, consisting of:
+
+            - 2D array of floats of shape (50000, 48, 48, 1),
+              containing the pixel values for each image.
+
+            - integer vector of length 50000, containing the value of
+              the number in the image.
+
+        - validation_data: same as training_data, except with length
+          10000
+
+        - test_data: same as training_data, except with length
+          10000
+
+    """
+
+    # Get the data.
+    tr_d, tr_v, va_d, va_v, te_d, te_v = load_mnist_1D(filename = filename)
+
+    # Reshape the data.
+    training_inputs = np.array([x.reshape(28, 28, 1) for x in tr_d])
+    validation_inputs = np.array([x.reshape(28, 28, 1) for x in va_d])
+    test_inputs = np.array([x.reshape(28, 28, 1) for x in te_d])
+
+    # Return the data.
+    return training_inputs, tr_v, validation_inputs, va_v, \
+        test_inputs, te_v
+
diff --git a/code/plotting_routines.py b/code/plotting_routines.py
new file mode 100644
index 0000000..a9b4ad3
--- /dev/null
+++ b/code/plotting_routines.py
@@ -0,0 +1,137 @@
+#
+# Introduction to Neural Networks.
+# Given at SciNet, May 30 2017, by Erik Spence.
+#
+# This file, plotting_routines.py, contains some simple plotting
+# routines.
+#
+
+#######################################################################
+
+
+"""
+plotting_routines.py contains two routines for plotting the class'
+data.
+
+"""
+
+
+#######################################################################
+
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+
+#######################################################################
+
+
+def plot_dots(x, v, **kwargs):
+
+    """
+    This function will generate a scatter plot of the data, with the
+    colour of the dots indicating the category of the data point.
+
+    Inputs:
+
+    - x: 2D array of floats of shape (num_points, 2), containing the 2D
+      position of the data points.  num_points is the number of data
+      points.
+
+    - v: integer vector of length num_points, containing the correct
+      values (0 or 1) for the data.
+
+    Outputs: nothing returned.
+
+    """
+
+    # Get the number of data points.
+    num_points = len(x[:, 0])
+
+    # Set min and max values and give it some padding.
+    if 'x_min' in kwargs: x_min = kwargs['x_min']
+    else: x_min = x[:, 0].min() * 1.1
+
+    if 'x_max' in kwargs: x_max = kwargs['x_max']
+    else: x_max = x[:, 0].max() * 1.1
+
+    if 'y_min' in kwargs: y_min = kwargs['y_min']
+    else: y_min = x[:, 1].min() * 1.1
+
+    if 'y_max' in kwargs: y_max = kwargs['y_max']
+    else: y_max = x[:, 1].max() * 1.1
+
+    # Set the colours based on the v values.
+    cy = np.array(['Orange'] * num_points)
+    cy[v == 1] = 'Blue'
+
+    # Plot the points, and tweak the axes.
+    plt.scatter(x[:, 0], x[:, 1], c = cy, s = 50)
+    plt.xlim(x_min, x_max)
+    plt.ylim(y_min, y_max)
+    plt.show()
+
+
+#######################################################################
+
+
+def plot_decision_boundary(x, v, model, predict_function, **kwargs):
+
+    """
+    This function generates a plot of the model's decision boundary,
+    and then scatter plots the data on top of it.
+
+    This function is heavily based on something I found on the web.
+    Unfortunately, I can't remember where I found it.  Thanks to the
+    author.
+
+    Inputs:
+
+    - x: 2D array of floats of shape (num_points, 2), containing the
+      2D position of the data points.  num_points is the number of
+      data points.
+
+    - v: integer vector of length num_points, containing the correct
+      values (0 or 1) for the data.
+
+    - model: dictionary containing the model parameters.
+
+    - predict_function: name of the function used to run the forward
+      pass of the model.
+
+    Outputs: nothing returned.
+
+    """
+
+    # Set min and max values and give it some padding
+    if 'x_min' in kwargs: x_min = kwargs['x_min']
+    else: x_min = x[:, 0].min() * 1.1
+
+    if 'x_max' in kwargs: x_max = kwargs['x_max']
+    else: x_max = x[:, 0].max() * 1.1
+
+    if 'y_min' in kwargs: y_min = kwargs['y_min']
+    else: y_min = x[:, 1].min() * 1.1
+
+    if 'y_max' in kwargs: y_max = kwargs['y_max']
+    else: y_max = x[:, 1].max() * 1.1
+
+    h = 0.01
+
+    # Generate a grid of points with distance h between them
+    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
+                         np.arange(y_min, y_max, h))
+
+    # Join the x and y positions.
+    c = np.c_[xx.ravel(), yy.ravel()]
+
+    # Calculate the model values for the whole grid.  Round to the
+    # nearest integer.
+    yp = np.round(predict_function(c, model))
+    yp = yp.reshape(xx.shape)
+
+    # Plot the model contour and training data.
+    plt.contourf(xx, yy, yp, cmap = plt.cm.Spectral)
+    plot_dots(x, v)
+
+    plt.show()
diff --git a/code/second_network.py b/code/second_network.py
new file mode 100644
index 0000000..f685537
--- /dev/null
+++ b/code/second_network.py
@@ -0,0 +1,276 @@
+#
+# Introduction to Neural Networks.
+# Given at SciNet, May 30 2017, by Erik Spence.
+#
+# This file, second_network.py, contains the implementation of our
+# second neural network.
+#
+
+#######################################################################
+
+
+"""
+second_network contains the implementation of our
+single-hidden-layer neural network.  Use 'build_model' to train the
+network.
+
+"""
+
+
+#######################################################################
+
+
+import numpy as np
+import numpy.random as npr
+
+
+#######################################################################
+
+
+def sigma(z):
+
+    """
+    Returns the sigmoid function evaluated at z.
+
+    Inputs:
+
+    - z: vector of floats.
+
+    Outputs:
+
+    - vector of floats, the same length as z.
+
+    """
+
+    # Return the result.
+    return 1. / (1. + np.exp(-z))
+
+
+#######################################################################
+
+
+def sigmaprime(z):
+
+    """
+    Returns the derivative of the sigmoid function, evaluated at z.
+
+    Inputs:
+
+    - z: vector of floats.
+
+    Outputs:
+
+    - vector of floats, the same length as z.
+
+    """
+
+    # Return the result.
+    return sigma(z) * (1.0 - sigma(z))
+
+
+#######################################################################
+
+
+def forward(x, model):
+
+    """This function runs a forward pass of the data through the neural
+    network, and returns the values which were calculated along the
+    way.
+
+    Inputs:
+
+    - x: 2D array of floats of shape (num_points, input_dims),
+      containing the data to be input to the network.  num_points is
+      the number of data points.  input_dims is the dimension of the
+      input data.
+
+    - model: dictionary containing the model parameters.  These model
+      parameters should include:
+
+        - 'w1': 2D array of floats of shape (num_nodes, input_dim).
+          These are the weights for the hidden layer.
+
+        - 'b1': 2D array of floats of shape (num_nodes, 1).  These are
+          the biases for the hidden layer.  The superfluous extra
+          dimension is needed so that the biases can be seamlessly
+          added to the weights-data product.
+
+        - 'w2': 2D array of floats of shape (output_dim, num_nodes).
+          These are the weights for the output layer.
+
+        - 'b2': 2D array of floats of shape (output_dim, 1).  These are
+          the biases for the output layer.
+
+    Outputs:
+
+    - z1, z2, a1, a2, as a tuple.  These are
+
+        - z1: 2D array of floats of shape (num_nodes, num_points),
+          containing the value of the variable z to be input to the
+          hidden layer.  num_nodes is the number of nodes in the
+          hidden layer.
+
+        - z2: 2D array of floats of shape (output_dim, num_points),
+          containing the value of the variable z to be input to the
+          output layer.  output_dim is the output dimension of the
+          network.
+
+        - a1: 2D array of floats of shape (num_nodes, num_points),
+          containing the output of the hidden layer.
+
+        - a2: 2D array of floats of shape (output_dim, num_points),
+          containing the output of the output layer.
+
+    """
+
+    # Forward propagation through the network.
+    # First the hidden layer.
+    z1 = model['w1'].dot(x.T) + model['b1']
+    a1 = sigma(z1)
+
+    # Then the output layer.
+    z2 = model['w2'].dot(a1) + model['b2']
+    a2 = sigma(z2)
+
+    return z1, z2, a1, a2
+
+
+#######################################################################
+
+
+def predict(x, model):
+
+    """
+    The predict function runs the data through a forward pass of the
+    neural network, and returns the output.  For our second network
+    this means calculating the variable a2, and getting the maximum
+    output values for each data point.
+
+    Inputs:
+
+    - x: 2D array of floats of shape (num_points, input_dims),
+      containing the data to be input to the network.
+
+    - model: dictionary containing the model parameters.
+
+    Outputs:
+
+    - vector of floats of length num_points.
+
+    """
+
+    # Run the data through the network, but we're only interested in
+    # the output.
+    _, _, _, a2 = forward(x, model)
+
+    # Get the maximum value for each datapoint, and return it.
+    return np.argmax(a2, axis = 0)
+
+
+#######################################################################
+
+
+# The first rule of Thesaurus Club is: do not discuss, confer about,
+# descant, confabulate, converse about or mention Thesaurus Club.
+
+
+#######################################################################
+
+
+def build_model(num_nodes, x, v, eta, output_dim, num_steps = 10000,
+                print_best = True, lam = 0.0):
+
+    """
+    This function uses gradient descent to update the neural network's
+    model parameters, minimizing the quadradic cost function.  It
+    returns the best model.
+
+    Inputs:
+
+    - num_nodes: integer, number of nodes in the hidden layer.
+
+    - x: 2D array of floats of shape (num_points, input_dim),
+      containing the input data.
+
+    - v: integer vector of length num_points, containing the correct
+      values (0 or 1) for the data.
+
+    - eta: float, the stepsize parameter for the gradient descent.
+
+    - output_dim: integer, number of nodes in the output layer.
+
+    - num_steps: integer, number of steps to iterate through the
+      training data for gradient descent.
+
+    - print_best: boolean, if True, print the model accuracy every
+      1000 iterations.
+
+    - lam: float, regularization parameter.
+
+    Outputs:
+
+    - dictionary containing the parameters of the best model.
+
+    """
+
+    # Get the input dimension of the data.
+    input_dim = np.shape(x)[1]
+
+    # Initialize the parameters to random values. We need to learn
+    # these.
+    model = {'w1': npr.randn(num_nodes, input_dim),
+             'b1': np.zeros([num_nodes, 1]),
+             'w2': npr.randn(output_dim, num_nodes),
+             'b2': np.zeros([output_dim, 1])}
+
+    # A scaling factor used in determining the best model.
+    scale = 100. / float(len(v))
+
+    # Initialize the score of our best model.
+    best = 0.0
+
+    # Forward propagation.
+    z1, _, a1, a2 = forward(x, model)
+
+    # Gradient descent.
+    for i in xrange(0, num_steps):
+
+        # Backpropagation
+        delta2 = a2
+        # Here we subtract v, which is just 1, but only where v == 1.
+        # This is the error in the final output (how wrong is it?).
+        # (We should similarly subtract 0 where v == 0, but of course
+        # this would not do anything.)
+        delta2[v, range(len(v))] -= 1
+        delta1 = (model['w2'].T).dot(delta2) * sigmaprime(z1)
+
+        # Calculate the derivatives.
+        dCdb2 = np.sum(delta2, axis = 1, keepdims = True)
+        dCdb1 = np.sum(delta1, axis = 1, keepdims = True)
+
+        dCdw2 = delta2.dot(a1.T)
+        dCdw1 = delta1.dot(x)
+
+        # Gradient descent parameter update, with regularization.
+        model['w1'] -= eta * (lam * model['w1'] + dCdw1)
+        model['b1'] -= eta * dCdb1
+        model['w2'] -= eta * (lam * model['w2'] + dCdw2)
+        model['b2'] -= eta * dCdb2
+
+        # Check to see if this is our best model yet.
+        z1, _, a1, a2 = forward(x, model)
+        score = sum(np.argmax(a2, axis = 0) == v) * scale
+
+        # Keep the best model.
+        if (score > best):
+            best, bestmodel = score, model.copy()
+
+        # Optionally print the score.
+        if (print_best) and (i % 1000 == 0):
+            print "Best by step %i: %.1f %%" % (i, best)
+
+    print "Our best model gets %.1f percent correct!" % best
+
+    # Return the best parameters.
+    return bestmodel
+

ViewGit