:78 Function create_function() is deprecated [8192]
Filename | |
---|---|
code/first_network.py | |
code/mnist_loader.py | |
code/plotting_routines.py | |
code/second_network.py |
diff --git a/code/first_network.py b/code/first_network.py new file mode 100644 index 0000000..9a06572 --- /dev/null +++ b/code/first_network.py @@ -0,0 +1,188 @@ +# +# Introduction to Neural Networks. +# Given at SciNet, May 30 2017, by Erik Spence. +# +# This file, first_network.py, contains the implementation of our +# first neural network. +# + +####################################################################### + + +""" +first_network contains the implementation of our single-node neural +network. Use 'build_model' to train the network. +""" + + +####################################################################### + + +import numpy as np +import numpy.random as npr + + +####################################################################### + + +def sigma(x, model): + + """ + Returns the sigmoid function evaluated at z, where z is the + product of the model parameters with x. + + Inputs: + + - x: 2D array of floats of shape (num_points, 2), containing the + 2D position of the data points. num_points is the number of + data points. + + - model: dictionary containing the model parameters. These model + parameters should include: + + - 'w1': float, weight which multiplies the x dimension of the + data. + + - 'w2': float, weight which multiplies the y dimension of the + data. + + - 'b1': float, bias for the network. + + Outputs: + + - vector of floats of length num_points. + + """ + + # Calculate z. + z = model['w1'] * x[:,0] + model['w2'] * x[:,1] + model['b'] + + # Return the result. + return 1. / (1. + np.exp(-z)) + + +####################################################################### + + +# The prediction function. This function runs the data, in the +# forward direction, through the neural network. Though it is +# obviously redundant for our first neural network, it is included for +# consistency with the second neural network example. +def predict(x, model): + + """ + The predict function runs the data through a forward pass of the + neural network, and returns the output. For our first network + this simply means invoking the sigmoid function on the input data. + + Inputs: + + - x: 2D array of floats of shape (num_points, 2), containing the + 2D position of the data points. num_points is the number of + data points. + + - model: dictionary containing the model parameters. + + Outputs: + + - vector of floats of length num_points. + + """ + + # Return the sigma function. + return sigma(x, model) + + +####################################################################### + + +# Chomsky, Heisenberg and Goedel walk into a bar. +# +# Heisenberg says: "I can tell we're in a joke, but I can't tell if +# it's funny." +# +# Goedel says: "We can't tell if it's funny because we're inside the +# joke." +# +# Chomsky says: "The joke's funny, you're just not telling it right." + + +####################################################################### + + +def build_model(x, v, eta = 0.01, num_steps = 10000, + print_best = True): + + """ + This function uses gradient descent to update the neural network's + model parameters, minimizing the quadradic cost function. It + returns the best model. + + Inputs: + + - x: 2D array of floats of shape (num_points, 2), containing the + 2D position of the data points. num_points is the number of + data points. + + - v: integer vector of length num_points, containing the correct + values (0 or 1) for the data. + + - eta: float, the stepsize parameter for the gradient descent. + + - num_steps: integer, number of steps to iterate through the + training data for gradient descent. + + - print_best: boolean, if True, print the model accuracy every + 1000 iterations. + + Outputs: + + - dictionary containing the parameters of the best model. + + """ + + # Initialize the parameters to random values. We need to learn + # these. + model = {'w1': npr.random(), 'w2': npr.random(), + 'b': npr.random()} + + # A scaling factor used in determining the best model. + scale = 100. / float(len(v)) + + # Initialize the score of our best model. + best = 0.0 + + # Forward propagation, to initialize f. + f = sigma(x, model) + + # Gradient descent. + for i in xrange(0, num_steps): + + # Calculate the derivatives. + temp = (f - v) * f * (1 - f) + dCdw1 = sum(temp * x[:, 0]) + dCdw2 = sum(temp * x[:, 1]) + dCdb = sum(temp) + + # Update the parameters + model['w1'] -= eta * dCdw1 + model['w2'] -= eta * dCdw2 + model['b'] -= eta * dCdb + + # Check to see if this is our best model yet. + f = sigma(x, model) + score = sum(np.round(f) == v) * scale + + # Keep the best model. + if (score > best): + best, bestmodel = score, model.copy() + + # Optionally print the score. + if (print_best) and (i % 1000 == 0): + print "Best by step %i: %.1f %%" % (i, best) + + print "Our best model gets %.1f percent correct!" % best + + # Return the best parameters + return bestmodel + diff --git a/code/mnist_loader.py b/code/mnist_loader.py new file mode 100644 index 0000000..59d3e71 --- /dev/null +++ b/code/mnist_loader.py @@ -0,0 +1,127 @@ +# +# Introduction to Neural Networks. +# Given at SciNet, May 30 2017, by Erik Spence. +# +# This file, mnist_loader.py, contains the code needed to load the +# MNIST dataset. The code borrows heavily from +# http://neuralnetworksanddeeplearning.com. +# + +####################################################################### + + +""" +mnist_loader contains the code needed to load the MNIST dataset, +both 1D and 2D versions. The code has been heavily borrowed from +http://neuralnetworksanddeeplearning.com. + +""" + + +####################################################################### + + +try: + import cPickle +except: + import pickle as cPickle + +import gzip +import numpy as np + + +####################################################################### + + +def load_mnist_1D(filename = '../data/mnist.pkl.gz'): + + """ + Returns the MNIST data as a tuple containing the training data, + the validation data, and the test data. + + Inputs: + + - filename: string, name of the file containing the data. + + Outputs: + + - tuple, containing the training, validation and test data. These + take the form: + + - training_data: tuple, consisting of: + + - 2D array of floats of shape (50000, 784), containing the + pixel values for each image. + + - integer vector of length 50000, containing the value of + the number in the image. + + - validation_data: same as training_data, except with length + 10000 + + - test_data: same as training_data, except with length + 10000 + + """ + + # Open the file. + f = gzip.open(filename, 'rb') + + # Load the data. + training_data, validation_data, test_data = cPickle.load(f) + + # Close the file. + f.close() + + # Return the values. + return training_data[0], training_data[1], \ + validation_data[0], validation_data[1], \ + test_data[0], test_data[1] + + +####################################################################### + + +def load_mnist_2D(filename = ''): + + """ + Returns the MNIST data as a tuple containing the training data, + the validation data, and the test data. + + Inputs: + + - filename: string, name of the file containing the data. + + Outputs: + + - tuple, containing the training, validation and test data. These + take the form: + + - training_data: tuple, consisting of: + + - 2D array of floats of shape (50000, 48, 48, 1), + containing the pixel values for each image. + + - integer vector of length 50000, containing the value of + the number in the image. + + - validation_data: same as training_data, except with length + 10000 + + - test_data: same as training_data, except with length + 10000 + + """ + + # Get the data. + tr_d, tr_v, va_d, va_v, te_d, te_v = load_mnist_1D(filename = filename) + + # Reshape the data. + training_inputs = np.array([x.reshape(28, 28, 1) for x in tr_d]) + validation_inputs = np.array([x.reshape(28, 28, 1) for x in va_d]) + test_inputs = np.array([x.reshape(28, 28, 1) for x in te_d]) + + # Return the data. + return training_inputs, tr_v, validation_inputs, va_v, \ + test_inputs, te_v + diff --git a/code/plotting_routines.py b/code/plotting_routines.py new file mode 100644 index 0000000..a9b4ad3 --- /dev/null +++ b/code/plotting_routines.py @@ -0,0 +1,137 @@ +# +# Introduction to Neural Networks. +# Given at SciNet, May 30 2017, by Erik Spence. +# +# This file, plotting_routines.py, contains some simple plotting +# routines. +# + +####################################################################### + + +""" +plotting_routines.py contains two routines for plotting the class' +data. + +""" + + +####################################################################### + + +import matplotlib.pyplot as plt +import numpy as np + + +####################################################################### + + +def plot_dots(x, v, **kwargs): + + """ + This function will generate a scatter plot of the data, with the + colour of the dots indicating the category of the data point. + + Inputs: + + - x: 2D array of floats of shape (num_points, 2), containing the 2D + position of the data points. num_points is the number of data + points. + + - v: integer vector of length num_points, containing the correct + values (0 or 1) for the data. + + Outputs: nothing returned. + + """ + + # Get the number of data points. + num_points = len(x[:, 0]) + + # Set min and max values and give it some padding. + if 'x_min' in kwargs: x_min = kwargs['x_min'] + else: x_min = x[:, 0].min() * 1.1 + + if 'x_max' in kwargs: x_max = kwargs['x_max'] + else: x_max = x[:, 0].max() * 1.1 + + if 'y_min' in kwargs: y_min = kwargs['y_min'] + else: y_min = x[:, 1].min() * 1.1 + + if 'y_max' in kwargs: y_max = kwargs['y_max'] + else: y_max = x[:, 1].max() * 1.1 + + # Set the colours based on the v values. + cy = np.array(['Orange'] * num_points) + cy[v == 1] = 'Blue' + + # Plot the points, and tweak the axes. + plt.scatter(x[:, 0], x[:, 1], c = cy, s = 50) + plt.xlim(x_min, x_max) + plt.ylim(y_min, y_max) + plt.show() + + +####################################################################### + + +def plot_decision_boundary(x, v, model, predict_function, **kwargs): + + """ + This function generates a plot of the model's decision boundary, + and then scatter plots the data on top of it. + + This function is heavily based on something I found on the web. + Unfortunately, I can't remember where I found it. Thanks to the + author. + + Inputs: + + - x: 2D array of floats of shape (num_points, 2), containing the + 2D position of the data points. num_points is the number of + data points. + + - v: integer vector of length num_points, containing the correct + values (0 or 1) for the data. + + - model: dictionary containing the model parameters. + + - predict_function: name of the function used to run the forward + pass of the model. + + Outputs: nothing returned. + + """ + + # Set min and max values and give it some padding + if 'x_min' in kwargs: x_min = kwargs['x_min'] + else: x_min = x[:, 0].min() * 1.1 + + if 'x_max' in kwargs: x_max = kwargs['x_max'] + else: x_max = x[:, 0].max() * 1.1 + + if 'y_min' in kwargs: y_min = kwargs['y_min'] + else: y_min = x[:, 1].min() * 1.1 + + if 'y_max' in kwargs: y_max = kwargs['y_max'] + else: y_max = x[:, 1].max() * 1.1 + + h = 0.01 + + # Generate a grid of points with distance h between them + xx, yy = np.meshgrid(np.arange(x_min, x_max, h), + np.arange(y_min, y_max, h)) + + # Join the x and y positions. + c = np.c_[xx.ravel(), yy.ravel()] + + # Calculate the model values for the whole grid. Round to the + # nearest integer. + yp = np.round(predict_function(c, model)) + yp = yp.reshape(xx.shape) + + # Plot the model contour and training data. + plt.contourf(xx, yy, yp, cmap = plt.cm.Spectral) + plot_dots(x, v) + + plt.show() diff --git a/code/second_network.py b/code/second_network.py new file mode 100644 index 0000000..f685537 --- /dev/null +++ b/code/second_network.py @@ -0,0 +1,276 @@ +# +# Introduction to Neural Networks. +# Given at SciNet, May 30 2017, by Erik Spence. +# +# This file, second_network.py, contains the implementation of our +# second neural network. +# + +####################################################################### + + +""" +second_network contains the implementation of our +single-hidden-layer neural network. Use 'build_model' to train the +network. + +""" + + +####################################################################### + + +import numpy as np +import numpy.random as npr + + +####################################################################### + + +def sigma(z): + + """ + Returns the sigmoid function evaluated at z. + + Inputs: + + - z: vector of floats. + + Outputs: + + - vector of floats, the same length as z. + + """ + + # Return the result. + return 1. / (1. + np.exp(-z)) + + +####################################################################### + + +def sigmaprime(z): + + """ + Returns the derivative of the sigmoid function, evaluated at z. + + Inputs: + + - z: vector of floats. + + Outputs: + + - vector of floats, the same length as z. + + """ + + # Return the result. + return sigma(z) * (1.0 - sigma(z)) + + +####################################################################### + + +def forward(x, model): + + """This function runs a forward pass of the data through the neural + network, and returns the values which were calculated along the + way. + + Inputs: + + - x: 2D array of floats of shape (num_points, input_dims), + containing the data to be input to the network. num_points is + the number of data points. input_dims is the dimension of the + input data. + + - model: dictionary containing the model parameters. These model + parameters should include: + + - 'w1': 2D array of floats of shape (num_nodes, input_dim). + These are the weights for the hidden layer. + + - 'b1': 2D array of floats of shape (num_nodes, 1). These are + the biases for the hidden layer. The superfluous extra + dimension is needed so that the biases can be seamlessly + added to the weights-data product. + + - 'w2': 2D array of floats of shape (output_dim, num_nodes). + These are the weights for the output layer. + + - 'b2': 2D array of floats of shape (output_dim, 1). These are + the biases for the output layer. + + Outputs: + + - z1, z2, a1, a2, as a tuple. These are + + - z1: 2D array of floats of shape (num_nodes, num_points), + containing the value of the variable z to be input to the + hidden layer. num_nodes is the number of nodes in the + hidden layer. + + - z2: 2D array of floats of shape (output_dim, num_points), + containing the value of the variable z to be input to the + output layer. output_dim is the output dimension of the + network. + + - a1: 2D array of floats of shape (num_nodes, num_points), + containing the output of the hidden layer. + + - a2: 2D array of floats of shape (output_dim, num_points), + containing the output of the output layer. + + """ + + # Forward propagation through the network. + # First the hidden layer. + z1 = model['w1'].dot(x.T) + model['b1'] + a1 = sigma(z1) + + # Then the output layer. + z2 = model['w2'].dot(a1) + model['b2'] + a2 = sigma(z2) + + return z1, z2, a1, a2 + + +####################################################################### + + +def predict(x, model): + + """ + The predict function runs the data through a forward pass of the + neural network, and returns the output. For our second network + this means calculating the variable a2, and getting the maximum + output values for each data point. + + Inputs: + + - x: 2D array of floats of shape (num_points, input_dims), + containing the data to be input to the network. + + - model: dictionary containing the model parameters. + + Outputs: + + - vector of floats of length num_points. + + """ + + # Run the data through the network, but we're only interested in + # the output. + _, _, _, a2 = forward(x, model) + + # Get the maximum value for each datapoint, and return it. + return np.argmax(a2, axis = 0) + + +####################################################################### + + +# The first rule of Thesaurus Club is: do not discuss, confer about, +# descant, confabulate, converse about or mention Thesaurus Club. + + +####################################################################### + + +def build_model(num_nodes, x, v, eta, output_dim, num_steps = 10000, + print_best = True, lam = 0.0): + + """ + This function uses gradient descent to update the neural network's + model parameters, minimizing the quadradic cost function. It + returns the best model. + + Inputs: + + - num_nodes: integer, number of nodes in the hidden layer. + + - x: 2D array of floats of shape (num_points, input_dim), + containing the input data. + + - v: integer vector of length num_points, containing the correct + values (0 or 1) for the data. + + - eta: float, the stepsize parameter for the gradient descent. + + - output_dim: integer, number of nodes in the output layer. + + - num_steps: integer, number of steps to iterate through the + training data for gradient descent. + + - print_best: boolean, if True, print the model accuracy every + 1000 iterations. + + - lam: float, regularization parameter. + + Outputs: + + - dictionary containing the parameters of the best model. + + """ + + # Get the input dimension of the data. + input_dim = np.shape(x)[1] + + # Initialize the parameters to random values. We need to learn + # these. + model = {'w1': npr.randn(num_nodes, input_dim), + 'b1': np.zeros([num_nodes, 1]), + 'w2': npr.randn(output_dim, num_nodes), + 'b2': np.zeros([output_dim, 1])} + + # A scaling factor used in determining the best model. + scale = 100. / float(len(v)) + + # Initialize the score of our best model. + best = 0.0 + + # Forward propagation. + z1, _, a1, a2 = forward(x, model) + + # Gradient descent. + for i in xrange(0, num_steps): + + # Backpropagation + delta2 = a2 + # Here we subtract v, which is just 1, but only where v == 1. + # This is the error in the final output (how wrong is it?). + # (We should similarly subtract 0 where v == 0, but of course + # this would not do anything.) + delta2[v, range(len(v))] -= 1 + delta1 = (model['w2'].T).dot(delta2) * sigmaprime(z1) + + # Calculate the derivatives. + dCdb2 = np.sum(delta2, axis = 1, keepdims = True) + dCdb1 = np.sum(delta1, axis = 1, keepdims = True) + + dCdw2 = delta2.dot(a1.T) + dCdw1 = delta1.dot(x) + + # Gradient descent parameter update, with regularization. + model['w1'] -= eta * (lam * model['w1'] + dCdw1) + model['b1'] -= eta * dCdb1 + model['w2'] -= eta * (lam * model['w2'] + dCdw2) + model['b2'] -= eta * dCdb2 + + # Check to see if this is our best model yet. + z1, _, a1, a2 = forward(x, model) + score = sum(np.argmax(a2, axis = 0) == v) * scale + + # Keep the best model. + if (score > best): + best, bestmodel = score, model.copy() + + # Optionally print the score. + if (print_best) and (i % 1000 == 0): + print "Best by step %i: %.1f %%" % (i, best) + + print "Our best model gets %.1f percent correct!" % best + + # Return the best parameters. + return bestmodel +