python构建深度神经网络(续)
这篇文章在前一篇文章:python构建深度神经网络(DNN)的基础上,添加了一下几个内容:
1)正则化项
2)调出中间损失函数的输出
3)构建了交叉损失函数
4)将训练好的网络进行保存,并调用用来测试新数据
1 数据预处理
#!/usr/bin/envpython
#-*-coding:utf-8-*-
#@Time:2017-03-1215:11
#@Author:CC
#@File:net_load_data.py
fromnumpyimport*
importnumpyasnp
importcPickle
defload_data():
"""载入解压后的数据,并读取"""
withopen('data/mnist_pkl/mnist.pkl','rb')asf:
try:
train_data,validation_data,test_data=cPickle.load(f)
print"thefileopensucessfully"
#printtrain_data[0].shape#(50000,784)
#printtrain_data[1].shape#(50000,)
return(train_data,validation_data,test_data)
exceptEOFError:
print'thefileopenerror'
returnNone
defdata_transform():
"""将数据转化为计算格式"""
t_d,va_d,te_d=load_data()
#printt_d[0].shape#(50000,784)
#printte_d[0].shape#(10000,784)
#printva_d[0].shape#(10000,784)
#n1=[np.reshape(x,784,1)forxint_d[0]]#将5万个数据分别逐个取出化成(784,1),逐个排列
n=[np.reshape(x,(784,1))forxint_d[0]]#将5万个数据分别逐个取出化成(784,1),逐个排列
#print'n1',n1[0].shape
#print'n',n[0].shape
m=[vectors(y)foryint_d[1]]#将5万标签(50000,1)化为(10,50000)
train_data=zip(n,m)#将数据与标签打包成元组形式
n=[np.reshape(x,(784,1))forxinva_d[0]]#将5万个数据分别逐个取出化成(784,1),排列
validation_data=zip(n,va_d[1])#没有将标签数据矢量化
n=[np.reshape(x,(784,1))forxinte_d[0]]#将5万个数据分别逐个取出化成(784,1),排列
test_data=zip(n,te_d[1])#没有将标签数据矢量化
#printtrain_data[0][0].shape#(784,)
#print"len(train_data[0])",len(train_data[0])#2
#print"len(train_data[100])",len(train_data[100])#2
#print"len(train_data[0][0])",len(train_data[0][0])#784
#print"train_data[0][0].shape",train_data[0][0].shape#(784,1)
#print"len(train_data)",len(train_data)#50000
#printtrain_data[0][1].shape#(10,1)
#printtest_data[0][1]#7
return(train_data,validation_data,test_data)
defvectors(y):
"赋予标签"
label=np.zeros((10,1))
label[y]=1.0#浮点计算
returnlabel
2网络定义和训练
#!/usr/bin/envpython
#-*-coding:utf-8-*-
#@Time:2017-03-2810:18
#@Author:CC
#@File:net_network2.py
fromnumpyimport*
importnumpyasnp
importoperator
importjson
#importsys
classQuadraticCost():
"""定义二次代价函数类的方法"""
@staticmethod
deffn(a,y):
cost=0.5*np.linalg.norm(a-y)**2
returncost
@staticmethod
defdelta(z,a,y):
delta=(a-y)*sig_derivate(z)
returndelta
classCrossEntroyCost():
"""定义交叉熵函数类的方法"""
@staticmethod
deffn(a,y):
cost=np.sum(np.nan_to_num(-y*np.log(a)-(1-y)*np.log(1-a)))#notanumber---0,inf---largernumber
returncost
@staticmethod
defdelta(z,a,y):
delta=(a-y)
returndelta
classNetwork(object):
"""定义网络结构和方法"""
def__init__(self,sizes,cost):
self.num_layer=len(sizes)
self.sizes=sizes
self.cost=cost
#print"self.cost.__name__:",self.cost.__name__#CrossEntropyCost
self.default_weight_initializer()
defdefault_weight_initializer(self):
"""权值初始化"""
self.bias=[np.random.rand(x,1)forxinself.sizes[1:]]
self.weight=[np.random.randn(y,x)/float(np.sqrt(x))for(x,y)inzip(self.sizes[:-1],self.sizes[1:])]
deflarge_weight_initializer(self):
"""权值另一种初始化"""
self.bias=[np.random.rand(x,1)forxinself.sizes[1:]]
self.weight=[np.random.randn(y,x)forx,yinzip(self.sizes[:-1],self.sizes[1:])]
defforward(self,a):
"""forwardthenetwork"""
forw,binzip(self.weight,self.bias):
a=sigmoid(np.dot(w,a)+b)
returna
defSGD(self,train_data,min_batch_size,epochs,eta,test_data=False,
lambd=0,
monitor_train_cost=False,
monitor_train_accuracy=False,
monitor_test_cost=False,
monitor_test_accuracy=False
):
"""1)Setthetrain_data,shuffle;
2)looptheepoches,
3)setthemin_batches,andruleofupdate"""
iftest_data:n_test=len(test_data)
n=len(train_data)
foriinxrange(epochs):
random.shuffle(train_data)
min_batches=[train_data[k:k+min_batch_size]forkinxrange(0,n,min_batch_size)]
formin_batchinmin_batches:#每次提取一个批次的样本
self.update_minbatch_parameter(min_batch,eta,lambd,n)
train_cost=[]
ifmonitor_train_cost:
cost1=self.total_cost(train_data,lambd,cont=False)
train_cost.append(cost1)
print"epoche{0},train_cost:{1}".format(i,cost1)
ifmonitor_train_accuracy:
accuracy=self.accuracy(train_data,cont=True)
train_cost.append(accuracy)
print"epoche{0}/{1},train_accuracy:{2}".format(i,epochs,accuracy)
test_cost=[]
ifmonitor_test_cost:
cost1=self.total_cost(test_data,lambd)
test_cost.append(cost1)
print"epoche{0},test_cost:{1}".format(i,cost1)
test_accuracy=[]
ifmonitor_test_accuracy:
accuracy=self.accuracy(test_data)
test_cost.append(accuracy)
print"epoche:{0}/{1},test_accuracy:{2}".format(i,epochs,accuracy)
self.save(filename="net_save")#保存网络网络参数
deftotal_cost(self,train_data,lambd,cont=True):
cost1=0.0
forx,yintrain_data:
a=self.forward(x)
ifcont:y=vectors(y)#将测试样本标签化为矩阵
cost1+=(self.cost).fn(a,y)/len(train_data)
cost1+=lambd/len(train_data)*np.sum(np.linalg.norm(weight)**2forweightinself.weight)#加上权值项
returncost1
defaccuracy(self,train_data,cont=False):
ifcont:
output1=[(np.argmax(self.forward(x)),np.argmax(y))for(x,y)intrain_data]
else:
output1=[(np.argmax(self.forward(x)),y)for(x,y)intrain_data]
returnsum(int(out1==y)for(out1,y)inoutput1)
defupdate_minbatch_parameter(self,min_batch,eta,lambd,n):
"""1)determinetheweightandbias
2)calculatethethedelta
3)updatethedata"""
able_b=[np.zeros(b.shape)forbinself.bias]
able_w=[np.zeros(w.shape)forwinself.weight]
forx,yinmin_batch:#每次只取一个样本?
deltab,deltaw=self.backprop(x,y)
able_b=[a_b+dabfora_b,dabinzip(able_b,deltab)]#实际上对dw,db做批次累加,最后小批次取平均
able_w=[a_w+dawfora_w,dawinzip(able_w,deltaw)]
self.weight=[weight-eta*(dw)/len(min_batch)-eta*(lambd*weight)/nforweight,dwinzip(self.weight,able_w)]
#增加正则化项:eta*lambda/m*weight
self.bias=[bias-eta*db/len(min_batch)forbias,dbinzip(self.bias,able_b)]
defbackprop(self,x,y):
""""1)clacutheforwardvalue
2)calcuthedelta:delta=(y-f(z));deltak=delta*w(k)*fz(k-1)'
3)clacuthedeltaineverylayer:deltab=delta;deltaw=delta*fz(k-1)"""
deltab=[np.zeros(b.shape)forbinself.bias]
deltaw=[np.zeros(w.shape)forwinself.weight]
zs=[]
activate=x
activates=[x]
forw,binzip(self.weight,self.bias):
z=np.dot(w,activate)+b
zs.append(z)
activate=sigmoid(z)
activates.append(activate)
#backprop
delta=self.cost.delta(zs[-1],activates[-1],y)#调用不同代价函数的方法求梯度
deltab[-1]=delta
deltaw[-1]=np.dot(delta,activates[-2].transpose())
foriinxrange(2,self.num_layer):
z=zs[-i]
delta=np.dot(self.weight[-i+1].transpose(),delta)*sig_derivate(z)
deltab[-i]=delta
deltaw[-i]=np.dot(delta,activates[-i-1].transpose())
return(deltab,deltaw)
defsave(self,filename):
"""将训练好的网络采用json(javascriptobjectnotation)将对象保存成字符串保存,用于生产部署
encoder=json.dumps(data)
python原始类型(没有数组类型)向json类型的转化对照表:
pythonjson
dictobject
list/tuplearrary
int/long/floatnumber
.tolist()将数组转化为列表
>>>a=np.array([[1,2],[3,4]])
>>>list(a)
[array([1,2]),array([3,4])]
>>>a.tolist()
[[1,2],[3,4]]
"""
data={"sizes":self.sizes,"weight":[weight.tolist()forweightinself.weight],
"bias":([bias.tolist()forbiasinself.bias]),
"cost":str(self.cost.__name__)}
#保存网络训练好的权值,偏置,交叉熵参数。
f=open(filename,"w")
json.dump(data,f)
f.close()
defload_net(filename):
"""采用data=json.load(json.dumps(data))进行解码,
decoder=json.load(encoder)
编码后和解码后键不会按照原始data的键顺序排列,但每个键对应的值不会变
载入训练好的网络用于测试"""
f=open(filename,"r")
data=json.load(f)
f.close()
#print"data[cost]",getattr(sys.modules[__name__],data["cost"])#获得属性__main__.CrossEntropyCost
#print"data[cost]",data["cost"],data["sizes"]
net=Network(data["sizes"],cost=data["cost"])#网络初始化
net.weight=[np.array(w)forwindata["weight"]]#赋予训练好的权值,并将list--->array
net.bias=[np.array(b)forbindata["bias"]]
returnnet
defsig_derivate(z):
"""derivatesigmoid"""
returnsigmoid(z)*(1-sigmoid(z))
defsigmoid(x):
sigm=1.0/(1.0+exp(-x))
returnsigm
defvectors(y):
"""赋予标签"""
label=np.zeros((10,1))
label[y]=1.0#浮点计算
returnlabel
3)网络测试
#!/usr/bin/envpython #-*-coding:utf-8-*- #@Time:2017-03-1215:24 #@Author:CC #@File:net_test.py importnet_load_data #net_load_data.load_data() train_data,validation_data,test_data=net_load_data.data_transform() importnet_network2asnet cost=net.QuadraticCost cost=net.CrossEntroyCost lambd=0 net1=net.Network([784,50,10],cost) min_batch_size=30 eta=3.0 epoches=2 net1.SGD(train_data,min_batch_size,epoches,eta,test_data, lambd, monitor_train_cost=True, monitor_train_accuracy=True, monitor_test_cost=True, monitor_test_accuracy=True ) print"complete"
4调用训练好的网络进行测试
#!/usr/bin/envpython #-*-coding:utf-8-*- #@Time:2017-03-2817:27 #@Author:CC #@File:forward_test.py importnumpyasnp #对训练好的网络直接进行调用,并用测试样本进行测试 importnet_load_data#导入测试数据 importnet_network2asnet train_data,validation_data,test_data=net_load_data.data_transform() net=net.load_net(filename="net_save")#导入网络 output=[(np.argmax(net.forward(x)),y)for(x,y)intest_data]#测试 printsum(int(y1==y2)for(y1,y2)inoutput)#输出最终值
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持毛票票。