Neural network implementation using python + numpy -
have written code implement simple nn using python+numpy , optimize using fmincg scipy. think there issue gradient function - have tried using smaller nn also. cannot figure out wrong. tried optimizing - not converge. have looked many similar implementations cannot find meaningful difference. appreciate folks. in advance!
import numpy np numpy import linalg la import time scipy import optimize def sigmoid(x): return (1/(1+np.exp(-x))) def pack_thetas(t1, t2): return np.concatenate((t1.reshape(-1), t2.reshape(-1))) def unpack_thetas(thetas, input_layer_size, hidden_layer_size, num_labels): t1_start = 0 t1_end = hidden_layer_size * (input_layer_size + 1) t1 = thetas[t1_start:t1_end].reshape((hidden_layer_size, input_layer_size + 1)) t2 = thetas[t1_end:].reshape((num_labels, hidden_layer_size + 1)) return t1, t2 def cost(params, x, y, lambdap, n, h): cost = 0.0 m, num_labels = np.shape(y) '''unroll thetas''' t1, t2 = unpack_thetas(params, n, h, num_labels) a2 = sigmoid(x.dot(t1.t)) x2 = np.concatenate((np.ones((m,1)), a2), axis = 1) yi = sigmoid(x2.dot(t2.t)) in range(m): j in range(num_labels): yout = yi[i, j].copy() yin = y[i, j].copy() cost -= yin * np.log(yout) + (1-yin)*np.log(1-yout) cost += (sum(sum(t1[:,1:] * t1[:,1:])) + sum(sum(t2[:,1:] * t2[:,1:]))) * lambdap / 2 '''not considering first column bias weights not regularized''' return(cost/m) def grad(params, x, y, lambdap, n, h): m, num_labels = np.shape(y) '''unroll thetas''' t1, t2 = unpack_thetas(params, n, h, num_labels) #t1 = np.reshape(params[0:h*(n+1)],(h, n+1)) #t2 = np.reshape(params[h*(n+1):],(num_labels, h+1)) t1g = np.zeros(np.shape(t1)) t2g = np.zeros(np.shape(t2)) in range(m): a1 = np.reshape(x[i,:],(1,n+1)) z2 = np.dot(a1, t1.t) a2 = np.concatenate((np.ones((1,1)), sigmoid(z2)), axis = 1) yout = sigmoid(np.dot(a2, t2.t)) error3 = yout - y[i,:] gz2 = sigmoid(z2)*(1-sigmoid(z2)) error2 = np.dot(t2[:,1:].t,error3.t) * (gz2.t) t2g = t2g + np.dot(error3.t,a2) t1g = t1g + np.dot(error2,a1) t1g[:,1:] = (1/m) * t1g[:,1:] + (lambdap/m)*t1[:,1:] t2g[:,1:] = (1/m) * t2g[:,1:] + (lambdap/m)*t2[:,1:] gradient = pack_thetas(t1g, t2g) return(gradient.flatten()) x = np.array(np.loadtxt("x.txt")) y = np.array(np.loadtxt("y.txt")) m, n = np.shape(x) #adding x0 x xb = np.concatenate((np.ones((m,1)),x), axis = 1) #one-hot encoding y yb = np.zeros((m,10), dtype="f") in range(m): yb[i, y[i]-1] = 1 print("done loading data...") h = 25 num_labels = 10 epsilon = 0.12 lambdap = 1 theta1 = np.random.random((h, n+1)) * 2 * epsilon - epsilon theta2 = np.random.random((num_labels, h+1)) * 2 * epsilon - epsilon params = pack_thetas(theta1, theta2) #params = np.concatenate((np.reshape(theta1,((h*(n+1)),1)),np.reshape(theta2,((num_labels*(h+1)),1))),axis=0) #cost @ initial thetas print(cost(params, xb, yb, 0.0, n, h)) print(cost(params, xb, yb, lambdap, n, h)) optiparams = params.flatten() optiparams = optimize.fmin_cg(cost, optiparams, fprime=grad, args=(xb, yb, lambdap, n, h), maxiter=100) #print("cost @ optimum = " + cost(optiparams, xb, yb, lambdap, n, h)) accuracy = 0 t1, t2 = unpack_thetas(optiparams, n, h, num_labels) a1 = np.reshape(xb,(1,n+1)) a2 = sigmoid(a1.dot(t1.t)) x2 = np.concatenate((np.ones((m,1)), a2), axis = 1) yi = sigmoid(x2.dot(t2.t)) in range(m): maxp = np.max(yi[i],axis=0) j in range(num_labels): if yi[i,j] == maxp: if yb[i,j] == 1: accuracy += 1 print(accuracy)
Comments
Post a Comment