Neural network implementation using python + numpy -


have written code implement simple nn using python+numpy , optimize using fmincg scipy. think there issue gradient function - have tried using smaller nn also. cannot figure out wrong. tried optimizing - not converge. have looked many similar implementations cannot find meaningful difference. appreciate folks. in advance!

import numpy np numpy import linalg la import time scipy import optimize  def sigmoid(x):     return (1/(1+np.exp(-x)))  def pack_thetas(t1, t2):     return np.concatenate((t1.reshape(-1), t2.reshape(-1)))  def unpack_thetas(thetas, input_layer_size, hidden_layer_size, num_labels):     t1_start = 0     t1_end = hidden_layer_size * (input_layer_size + 1)     t1 = thetas[t1_start:t1_end].reshape((hidden_layer_size, input_layer_size + 1))     t2 = thetas[t1_end:].reshape((num_labels, hidden_layer_size + 1))     return t1, t2  def cost(params, x, y, lambdap, n, h):     cost = 0.0     m, num_labels = np.shape(y)     '''unroll thetas'''     t1, t2 = unpack_thetas(params, n, h, num_labels)     a2 = sigmoid(x.dot(t1.t))     x2 = np.concatenate((np.ones((m,1)), a2), axis = 1)     yi = sigmoid(x2.dot(t2.t))      in range(m):         j in range(num_labels):             yout = yi[i, j].copy()             yin = y[i, j].copy()             cost -= yin * np.log(yout) + (1-yin)*np.log(1-yout)      cost += (sum(sum(t1[:,1:] * t1[:,1:])) + sum(sum(t2[:,1:] * t2[:,1:]))) * lambdap / 2     '''not considering first column bias weights not regularized'''     return(cost/m)  def grad(params, x, y, lambdap, n, h):     m, num_labels = np.shape(y)      '''unroll thetas'''     t1, t2 = unpack_thetas(params, n, h, num_labels)     #t1 = np.reshape(params[0:h*(n+1)],(h, n+1))     #t2 = np.reshape(params[h*(n+1):],(num_labels, h+1))     t1g = np.zeros(np.shape(t1))     t2g = np.zeros(np.shape(t2))      in range(m):         a1 = np.reshape(x[i,:],(1,n+1))         z2 = np.dot(a1, t1.t)         a2 = np.concatenate((np.ones((1,1)), sigmoid(z2)), axis = 1)         yout = sigmoid(np.dot(a2, t2.t))         error3 = yout - y[i,:]         gz2 = sigmoid(z2)*(1-sigmoid(z2))         error2 = np.dot(t2[:,1:].t,error3.t) * (gz2.t)         t2g = t2g + np.dot(error3.t,a2)         t1g = t1g + np.dot(error2,a1)      t1g[:,1:] = (1/m) * t1g[:,1:] + (lambdap/m)*t1[:,1:]     t2g[:,1:] = (1/m) * t2g[:,1:] + (lambdap/m)*t2[:,1:]      gradient = pack_thetas(t1g, t2g)     return(gradient.flatten())   x = np.array(np.loadtxt("x.txt")) y = np.array(np.loadtxt("y.txt")) m, n = np.shape(x)  #adding x0 x xb = np.concatenate((np.ones((m,1)),x), axis = 1)  #one-hot encoding y yb = np.zeros((m,10), dtype="f") in range(m):     yb[i, y[i]-1] = 1   print("done loading data...")  h = 25 num_labels = 10 epsilon = 0.12 lambdap = 1  theta1 = np.random.random((h, n+1)) * 2 * epsilon - epsilon theta2 = np.random.random((num_labels, h+1)) * 2 * epsilon - epsilon params = pack_thetas(theta1, theta2) #params = np.concatenate((np.reshape(theta1,((h*(n+1)),1)),np.reshape(theta2,((num_labels*(h+1)),1))),axis=0)  #cost @ initial thetas print(cost(params, xb, yb, 0.0, n, h)) print(cost(params, xb, yb, lambdap, n, h)) optiparams = params.flatten() optiparams = optimize.fmin_cg(cost, optiparams, fprime=grad, args=(xb, yb, lambdap, n, h), maxiter=100)  #print("cost @ optimum = " + cost(optiparams, xb, yb, lambdap, n, h)) accuracy = 0 t1, t2 = unpack_thetas(optiparams, n, h, num_labels)  a1 = np.reshape(xb,(1,n+1)) a2 = sigmoid(a1.dot(t1.t)) x2 = np.concatenate((np.ones((m,1)), a2), axis = 1) yi = sigmoid(x2.dot(t2.t))  in range(m):     maxp = np.max(yi[i],axis=0)     j in range(num_labels):         if yi[i,j] == maxp:             if yb[i,j] == 1:                 accuracy += 1  print(accuracy) 


Comments

Popular posts from this blog

java - Run spring boot application error: Cannot instantiate interface org.springframework.context.ApplicationListener -

reactjs - React router and this.props.children - how to pass state to this.props.children -

Excel VBA "Microsoft Windows Common Controls 6.0 (SP6)" Location Changes -