实例5:演示session使用
# -*- coding: utf-8 -*-
import tensorflow as tf
hello = tf.constant('Hello, TensorFlow!') #定义一个常量
sess = tf.Session() #建立一个session
print (sess.run(hello)) #通过session里面的run来运行结果
sess.close() #关闭session
b'Hello, Tensorflow!'
实例6:演示with session使用
# -*- coding: utf-8 -*-
import tensorflow as tf
a = tf.constant(3) #定义常量3
b = tf.constant(4) #定义常量4
with tf.Session() as sess: #建立session
print ("相加: %i" % sess.run(a+b))
print( "相乘: %i" % sess.run(a*b))
相加: 7 相乘: 12
实例7:演示feed_dict注入placeholder
# -*- coding: utf-8 -*-
import tensorflow as tf
a = tf.placeholder(tf.int16)
b = tf.placeholder(tf.int16)
add = tf.add(a, b)
mul = tf.multiply(a, b) #a与b相乘
with tf.Session() as sess:
# Run every operation with variable input
print ("相加: %i" % sess.run(add, feed_dict={a: 3, b: 4}))
print ("相乘: %i" % sess.run(mul, feed_dict={a: 3, b: 4}))
相加: 7 相乘: 12 [7, 12]
实例9:保存/载入回归模型
# -*- coding: utf-8 -*-
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
plotdata = { "batchsize":[], "loss":[] }
def moving_average(a, w=10):
if len(a) < w:
return a[:]
return [val if idx < w else sum(a[(idx-w):idx])/w for idx, val in enumerate(a)]
#生成模拟数据
train_X = np.linspace(-1, 1, 100)
train_Y = 2 * train_X + np.random.randn(*train_X.shape) * 0.3 # y=2x,但是加入了噪声
#图形显示
plt.plot(train_X, train_Y, 'ro', label='Original data')
plt.legend()
plt.show()
tf.reset_default_graph()
# 创建模型
# 占位符
X = tf.placeholder("float")
Y = tf.placeholder("float")
# 模型参数
W = tf.Variable(tf.random_normal([1]), name="weight")
b = tf.Variable(tf.zeros([1]), name="bias")
# 前向结构
z = tf.multiply(X, W)+ b
#反向优化
cost =tf.reduce_mean( tf.square(Y - z))
learning_rate = 0.01
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost) #Gradient descent
# 初始化变量
init = tf.global_variables_initializer()
#参数设置
training_epochs = 20
display_step = 2
saver = tf.train.Saver() # 生成saver
savedir = "log/"
# 启动session
with tf.Session() as sess:
sess.run(init)
# Fit all training data
for epoch in range(training_epochs):
for (x, y) in zip(train_X, train_Y):
sess.run(optimizer, feed_dict={X: x, Y: y})
#显示训练中的详细信息
if epoch % display_step == 0:
loss = sess.run(cost, feed_dict={X: train_X, Y:train_Y})
print ("Epoch:", epoch+1, "cost=", loss,"W=", sess.run(W), "b=", sess.run(b))
if not (loss == "NA" ):
plotdata["batchsize"].append(epoch)
plotdata["loss"].append(loss)
print (" Finished!")
saver.save(sess, savedir+"linermodel.cpkt")
print ("cost=", sess.run(cost, feed_dict={X: train_X, Y: train_Y}), "W=", sess.run(W), "b=", sess.run(b))
#print ("cost:",cost.eval({X: train_X, Y: train_Y}))
#图形显示
plt.plot(train_X, train_Y, 'ro', label='Original data')
plt.plot(train_X, sess.run(W) * train_X + sess.run(b), label='Fitted line')
plt.legend()
plt.show()
plotdata["avgloss"] = moving_average(plotdata["loss"])
plt.figure(1)
plt.subplot(211)
plt.plot(plotdata["batchsize"], plotdata["avgloss"], 'b--')
plt.xlabel('Minibatch number')
plt.ylabel('Loss')
plt.title('Minibatch run vs. Training loss')
plt.show()
#重启一个session
with tf.Session() as sess2:
sess2.run(tf.global_variables_initializer())
saver.restore(sess2, savedir+"linermodel.cpkt")
print ("x=0.2,z=", sess2.run(z, feed_dict={X: 0.2}))
实例10:分析模型内容,演示模型和其他保存方法
# -*- coding: utf-8 -*-
import tensorflow as tf
from tensorflow.python.tools.inspect_checkpoint import print_tensors_in_checkpoint_file
savedir = "log/"
print_tensors_in_checkpoint_file(savedir+"linermodel.cpkt", None, True)
W = tf.Variable(1.0, name="weight")
b = tf.Variable(2.0, name="bias")
# 放到一个字典里:
saver = tf.train.Saver({'weight': b, 'bias': W})
with tf.Session() as sess:
tf.global_variables_initializer().run()
saver.save(sess, savedir+"linermodel.cpkt")
print_tensors_in_checkpoint_file(savedir+"linermodel.cpkt", None, True)
tensor_name: bias [-0.02965496] tensor_name: weight [2.000632]
tensor_name: bias 1.0 tensor_name: weight 2.0
实例11:保存checkpoint
# -*- coding: utf-8 -*-
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
plotdata = { "batchsize":[], "loss":[] }
def moving_average(a, w=10):
if len(a) < w:
return a[:]
return [val if idx < w else sum(a[(idx-w):idx])/w for idx, val in enumerate(a)]
#生成模拟数据
train_X = np.linspace(-1, 1, 100)
train_Y = 2 * train_X + np.random.randn(*train_X.shape) * 0.3 # y=2x,但是加入了噪声
#图形显示
plt.plot(train_X, train_Y, 'ro', label='Original data')
plt.legend()
plt.show()
tf.reset_default_graph()
# 创建模型
# 占位符
X = tf.placeholder("float")
Y = tf.placeholder("float")
# 模型参数
W = tf.Variable(tf.random_normal([1]), name="weight")
b = tf.Variable(tf.zeros([1]), name="bias")
# 前向结构
z = tf.multiply(X, W)+ b
#反向优化
cost =tf.reduce_mean( tf.square(Y - z))
learning_rate = 0.01
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost) #Gradient descent
# 初始化变量
init = tf.global_variables_initializer()
#参数设置
training_epochs = 20
display_step = 2
saver = tf.train.Saver(max_to_keep=1) # 生成saver
savedir = "log/"
# 启动session
with tf.Session() as sess:
sess.run(init)
# Fit all training data
for epoch in range(training_epochs):
for (x, y) in zip(train_X, train_Y):
sess.run(optimizer, feed_dict={X: x, Y: y})
#显示训练中的详细信息
if epoch % display_step == 0:
loss = sess.run(cost, feed_dict={X: train_X, Y:train_Y})
print ("Epoch:", epoch+1, "cost=", loss,"W=", sess.run(W), "b=", sess.run(b))
if not (loss == "NA" ):
plotdata["batchsize"].append(epoch)
plotdata["loss"].append(loss)
saver.save(sess, savedir+"linermodel.cpkt", global_step=epoch)
print (" Finished!")
print ("cost=", sess.run(cost, feed_dict={X: train_X, Y: train_Y}), "W=", sess.run(W), "b=", sess.run(b))
#print ("cost:",cost.eval({X: train_X, Y: train_Y}))
#图形显示
plt.plot(train_X, train_Y, 'ro', label='Original data')
plt.plot(train_X, sess.run(W) * train_X + sess.run(b), label='Fitted line')
plt.legend()
plt.show()
plotdata["avgloss"] = moving_average(plotdata["loss"])
plt.figure(1)
plt.subplot(211)
plt.plot(plotdata["batchsize"], plotdata["avgloss"], 'b--')
plt.xlabel('Minibatch number')
plt.ylabel('Loss')
plt.title('Minibatch run vs. Training loss')
plt.show()
#重启一个session
load_epoch=18
with tf.Session() as sess2:
sess2.run(tf.global_variables_initializer())
saver.restore(sess2, savedir+"linermodel.cpkt-" + str(load_epoch))
print ("x=0.2,z=", sess2.run(z, feed_dict={X: 0.2}))
with tf.Session() as sess3:
sess3.run(tf.global_variables_initializer())
ckpt = tf.train.get_checkpoint_state(savedir)
if ckpt and ckpt.model_checkpoint_path:
saver.restore(sess3, ckpt.model_checkpoint_path)
print ("x=0.2,z=", sess3.run(z, feed_dict={X: 0.2}))
with tf.Session() as sess4:
sess4.run(tf.global_variables_initializer())
kpt = tf.train.latest_checkpoint(savedir)
if kpt!=None:
saver.restore(sess4, kpt)
print ("x=0.2,z=", sess4.run(z, feed_dict={X: 0.2}))
实例12:更简单保存checkpoint
# -*- coding: utf-8 -*-
import tensorflow as tf
tf.reset_default_graph()
global_step = tf.train.get_or_create_global_step()
step = tf.assign_add(global_step, 1)
with tf.train.MonitoredTrainingSession(checkpoint_dir='log/checkpoints',save_checkpoint_secs = 2) as sess:
print(sess.run([global_step]))
while not sess.should_stop():
i = sess.run( step)
print( i)
实例13:线性回归的TensorBoard可视化
# -*- coding: utf-8 -*-
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
plotdata = { "batchsize":[], "loss":[] }
def moving_average(a, w=10):
if len(a) < w:
return a[:]
return [val if idx < w else sum(a[(idx-w):idx])/w for idx, val in enumerate(a)]
#生成模拟数据
train_X = np.linspace(-1, 1, 100)
train_Y = 2 * train_X + np.random.randn(*train_X.shape) * 0.3 # y=2x,但是加入了噪声
#图形显示
plt.plot(train_X, train_Y, 'ro', label='Original data')
plt.legend()
plt.show()
tf.reset_default_graph()
# 创建模型
# 占位符
X = tf.placeholder("float")
Y = tf.placeholder("float")
# 模型参数
W = tf.Variable(tf.random_normal([1]), name="weight")
b = tf.Variable(tf.zeros([1]), name="bias")
# 前向结构
z = tf.multiply(X, W)+ b
tf.summary.histogram('z',z)#将预测值以直方图显示
#反向优化
cost =tf.reduce_mean( tf.square(Y - z))
tf.summary.scalar('loss_function', cost)#将损失以标量显示
learning_rate = 0.01
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost) #Gradient descent
# 初始化变量
init = tf.global_variables_initializer()
#参数设置
training_epochs = 20
display_step = 2
# 启动session
with tf.Session() as sess:
sess.run(init)
merged_summary_op = tf.summary.merge_all()#合并所有summary
#创建summary_writer,用于写文件
summary_writer = tf.summary.FileWriter('log/mnist_with_summaries',sess.graph)
# Fit all training data
for epoch in range(training_epochs):
for (x, y) in zip(train_X, train_Y):
sess.run(optimizer, feed_dict={X: x, Y: y})
#生成summary
summary_str = sess.run(merged_summary_op,feed_dict={X: x, Y: y});
summary_writer.add_summary(summary_str, epoch);#将summary 写入文件
#显示训练中的详细信息
if epoch % display_step == 0:
loss = sess.run(cost, feed_dict={X: train_X, Y:train_Y})
print ("Epoch:", epoch+1, "cost=", loss,"W=", sess.run(W), "b=", sess.run(b))
if not (loss == "NA" ):
plotdata["batchsize"].append(epoch)
plotdata["loss"].append(loss)
print (" Finished!")
print ("cost=", sess.run(cost, feed_dict={X: train_X, Y: train_Y}), "W=", sess.run(W), "b=", sess.run(b))
#print ("cost:",cost.eval({X: train_X, Y: train_Y}))
#图形显示
plt.plot(train_X, train_Y, 'ro', label='Original data')
plt.plot(train_X, sess.run(W) * train_X + sess.run(b), label='Fitted line')
plt.legend()
plt.show()
plotdata["avgloss"] = moving_average(plotdata["loss"])
plt.figure(1)
plt.subplot(211)
plt.plot(plotdata["batchsize"], plotdata["avgloss"], 'b--')
plt.xlabel('Minibatch number')
plt.ylabel('Loss')
plt.title('Minibatch run vs. Training loss')
plt.show()
print ("x=0.2,z=", sess.run(z, feed_dict={X: 0.2}))
tensorboard --logdir
实例14:get_variable和Variable的区别
# -*- coding: utf-8 -*-
import tensorflow as tf
tf.reset_default_graph()
var1 = tf.Variable(1.0 , name='firstvar')
print ("var1:",var1.name)
var1 = tf.Variable(2.0 , name='firstvar')
print ("var1:",var1.name)
var2 = tf.Variable(3.0 )
print ("var2:",var2.name)
var2 = tf.Variable(4.0 )
print ("var1:",var2.name)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
print("var1=",var1.eval())
print("var2=",var2.eval())
get_var1 = tf.get_variable("firstvar",[1], initializer=tf.constant_initializer(0.3))
print ("get_var1:",get_var1.name)
#get_var1 = tf.get_variable("firstvar",[1], initializer=tf.constant_initializer(0.4))
#print ("get_var1:",get_var1.name)
get_var1 = tf.get_variable("firstvar1",[1], initializer=tf.constant_initializer(0.4))
print ("get_var1:",get_var1.name)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
print("get_var1=",get_var1.eval())
var1: firstvar:0 var1: firstvar_1:0 var2: Variable:0 var1: Variable_1:0 var1= 2.0 var2= 4.0 get_var1: firstvar_2:0 get_var1: firstvar1:0 get_var1= [0.4]
实例15:在特定的作用域下获取变量
# -*- coding: utf-8 -*-
import tensorflow as tf
tf.reset_default_graph()
#var1 = tf.get_variable("firstvar",shape=[2],dtype=tf.float32)
#var2 = tf.get_variable("firstvar",shape=[2],dtype=tf.float32)
with tf.variable_scope("test1", ):
var1 = tf.get_variable("firstvar",shape=[2],dtype=tf.float32)
with tf.variable_scope("test2"):
var2 = tf.get_variable("firstvar",shape=[2],dtype=tf.float32)
print ("var1:",var1.name)
print ("var2:",var2.name)
var1: test1/firstvar:0 var2: test2/firstvar:0
实例16:变量共享
在上面代码中重新建议一个同样的scope
# -*- coding: utf-8 -*-
import tensorflow as tf
tf.reset_default_graph()
#var1 = tf.get_variable("firstvar",shape=[2],dtype=tf.float32)
#var2 = tf.get_variable("firstvar",shape=[2],dtype=tf.float32)
with tf.variable_scope("test1",reuse=tf.AUTO_REUSE ):
var1 = tf.get_variable("firstvar",shape=[2],dtype=tf.float32)
with tf.variable_scope("test2"):
var2 = tf.get_variable("firstvar",shape=[2],dtype=tf.float32)
print ("var1:",var1.name)
print ("var2:",var2.name)
with tf.variable_scope("test1",reuse=tf.AUTO_REUSE ):
var3= tf.get_variable("firstvar",shape=[2],dtype=tf.float32)
with tf.variable_scope("test2"):
var4 = tf.get_variable("firstvar",shape=[2],dtype=tf.float32)
print ("var3:",var3.name)
print ("var4:",var4.name)
var1: test1/firstvar:0 var2: test1/test2/firstvar:0 var3: test1/firstvar:0 var4: test1/test2/firstvar:0
实例17:初始化关系变量的作用域
# -*- coding: utf-8 -*-
import tensorflow as tf
with tf.variable_scope("test1", initializer=tf.constant_initializer(0.4) ):
var1 = tf.get_variable("firstvar",shape=[2],dtype=tf.float32)
with tf.variable_scope("test2"):
var2 = tf.get_variable("firstvar",shape=[2],dtype=tf.float32)
var3 = tf.get_variable("var3",shape=[2],initializer=tf.constant_initializer(0.3))
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
print("var1=",var1.eval())
print("var2=",var2.eval())
print("var3=",var3.eval())
var1= [0.4 0.4] var2= [0.4 0.4] var3= [0.3 0.3]
实例18:演示作用域与操作符的受限范围
可以看出,tf.name_scope只能限制操作op,不能限制变量命名
# -*- coding: utf-8 -*-
import tensorflow as tf
tf.reset_default_graph()
with tf.variable_scope("scope1") as sp:
var1 = tf.get_variable("v", [1])
print("sp:",sp.name)
print("var1:",var1.name)
with tf.variable_scope("scope2"):
var2 = tf.get_variable("v", [1])
with tf.variable_scope(sp) as sp1:
var3 = tf.get_variable("v3", [1])
with tf.variable_scope("") :
var4 = tf.get_variable("v4", [1])
print("sp1:",sp1.name)
print("var2:",var2.name)
print("var3:",var3.name)
print("var4:",var4.name)
with tf.variable_scope("scope"):
with tf.name_scope("bar"):
v = tf.get_variable("v", [1])
x = 1.0 + v
with tf.name_scope(""):
y = 1.0 + v
print("v:",v.name)
print("x.op:",x.op.name)
print("y.op:",y.op.name)
sp: scope1 var1: scope1/v:0 sp1: scope1 var2: scope2/v:0 var3: scope1/v3:0 var4: scope1//v4:0 v: scope/v:0 x.op: scope/bar/add y.op: add
实例19:图的基本操作
# -*- coding: utf-8 -*-
import numpy as np
import tensorflow as tf
# 1 创建图的方法
c = tf.constant(0.0)
g = tf.Graph()
with g.as_default():
c1 = tf.constant(0.0)
print(c1.graph)
print(g)
print(c.graph)
g2 = tf.get_default_graph()
print(g2)
tf.reset_default_graph()
g3 = tf.get_default_graph()
print(g3)
# 2. 获取tensor
print(c1.name)
t = g.get_tensor_by_name(name = "Const:0")
print(t)
# 3 获取op
a = tf.constant([[1.0, 2.0]])
b = tf.constant([[1.0], [3.0]])
tensor1 = tf.matmul(a, b, name='exampleop')
print(tensor1.name,tensor1)
test = g3.get_tensor_by_name("exampleop:0")
print(test)
print(tensor1.op.name)
testop = g3.get_operation_by_name("exampleop")
print(testop)
with tf.Session() as sess:
test = sess.run(test)
print(test)
test = tf.get_default_graph().get_tensor_by_name("exampleop:0")
print (test)
#4 获取所有列表
#返回图中的操作节点列表
tt2 = g.get_operations()
print(tt2)
#5
tt3 = g.as_graph_element(c1)
print(tt3)
print("________________________\n")
#练习
#with g.as_default():
# c1 = tf.constant(0.0)
# print(c1.graph)
# print(g)
# print(c.graph)
# g3 = tf.get_default_graph()
# print(g3)
<tensorflow.python.framework.ops.Graph object at 0x000002074662E748> <tensorflow.python.framework.ops.Graph object at 0x000002074662E748> <tensorflow.python.framework.ops.Graph object at 0x00000206A2AF9E48> <tensorflow.python.framework.ops.Graph object at 0x00000206A2AF9E48> <tensorflow.python.framework.ops.Graph object at 0x000002074662E860>
实例20:分布式部署
1. 为每个角色添加IP地址和端口,创建server
# -*- coding: utf-8 -*-
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
plotdata = { "batchsize":[], "loss":[] }
def moving_average(a, w=10):
if len(a) < w:
return a[:]
return [val if idx < w else sum(a[(idx-w):idx])/w for idx, val in enumerate(a)]
#生成模拟数据
train_X = np.linspace(-1, 1, 100)
train_Y = 2 * train_X + np.random.randn(*train_X.shape) * 0.3 # y=2x,但是加入了噪声
#图形显示
plt.plot(train_X, train_Y, 'ro', label='Original data')
plt.legend()
plt.show()
tf.reset_default_graph()
#定义ip和端口
strps_hosts="localhost:1681"
strworker_hosts="localhost:1682,localhost:1683"
#定义角色名称
strjob_name = "ps"
task_index = 0
ps_hosts = strps_hosts.split(',')
worker_hosts = strworker_hosts.split(',')
cluster_spec = tf.train.ClusterSpec({'ps': ps_hosts,'worker': worker_hosts})
#创建server
server = tf.train.Server(
{'ps': ps_hosts,'worker': worker_hosts},
job_name=strjob_name,
task_index=task_index)
2. 为ps角色添加等待函数
#ps角色使用join进行等待
if strjob_name == 'ps':
print("wait")
server.join()
3. 创建网络结构
with tf.device(tf.train.replica_device_setter(
worker_device="/job:worker/task:%d" % task_index,
cluster=cluster_spec)):
X = tf.placeholder("float")
Y = tf.placeholder("float")
# 模型参数
W = tf.Variable(tf.random_normal([1]), name="weight")
b = tf.Variable(tf.zeros([1]), name="bias")
global_step = tf.train.get_or_create_global_step()#获得迭代次数
# 前向结构
z = tf.multiply(X, W)+ b
tf.summary.histogram('z',z)#将预测值以直方图显示
#反向优化
cost =tf.reduce_mean( tf.square(Y - z))
tf.summary.scalar('loss_function', cost)#将损失以标量显示
learning_rate = 0.01
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost,global_step=global_step) #Gradient descent
saver = tf.train.Saver(max_to_keep=1)
merged_summary_op = tf.summary.merge_all()#合并所有summary
init = tf.global_variables_initializer()
4. 创建Supervisor,管理session
#参数设置
training_epochs = 2200
display_step = 2
sv = tf.train.Supervisor(is_chief=(task_index == 0),
logdir="log/super/",
init_op=init,
summary_op=None,
saver=saver,
global_step=global_step,
save_model_secs=5)
#连接目标角色创建session
with sv.managed_session(server.target) as sess:
#sess.run(init)
5. 迭代训练
print("sess ok")
print(global_step.eval(session=sess))
for epoch in range(global_step.eval(session=sess),training_epochs*len(train_X)):
for (x, y) in zip(train_X, train_Y):
_, epoch = sess.run([optimizer,global_step] ,feed_dict={X: x, Y: y})
#生成summary
summary_str = sess.run(merged_summary_op,feed_dict={X: x, Y: y});
#将summary 写入文件
sv.summary_computed(sess, summary_str,global_step=epoch)
if epoch % display_step == 0:
loss = sess.run(cost, feed_dict={X: train_X, Y:train_Y})
print ("Epoch:", epoch+1, "cost=", loss,"W=", sess.run(W), "b=", sess.run(b))
if not (loss == "NA" ):
plotdata["batchsize"].append(epoch)
plotdata["loss"].append(loss)
#sv.saver.save(sess,"log/mnist_with_summaries/",global_step=epoch)
print (" Finished!")
sv.saver.save(sess,"log/mnist_with_summaries/"+"sv.cpk",global_step=epoch)
sv.stop()
6. 建立worker文件
# -*- coding: utf-8 -*-
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
plotdata = { "batchsize":[], "loss":[] }
def moving_average(a, w=10):
if len(a) < w:
return a[:]
return [val if idx < w else sum(a[(idx-w):idx])/w for idx, val in enumerate(a)]
#生成模拟数据
train_X = np.linspace(-1, 1, 100)
train_Y = 2 * train_X + np.random.randn(*train_X.shape) * 0.3 # y=2x,但是加入了噪声
#图形显示
plt.plot(train_X, train_Y, 'ro', label='Original data')
plt.legend()
plt.show()
tf.reset_default_graph()
strps_hosts="localhost:1681"#,localhost:2222"
strworker_hosts="localhost:1682,localhost:1683"#,192.168.1.183:1680"#,localhost:2224"
strjob_name = "worker"
task_index = 0
ps_hosts = strps_hosts.split(',')
worker_hosts = strworker_hosts.split(',')
cluster_spec = tf.train.ClusterSpec({'ps': ps_hosts,'worker': worker_hosts})
server = tf.train.Server(
{'ps': ps_hosts,'worker': worker_hosts},
job_name=strjob_name,
task_index=task_index)
if strjob_name == 'ps':
print("wait")
server.join()
with tf.device(tf.train.replica_device_setter(
worker_device="/job:worker/task:%d" % task_index,
cluster=cluster_spec)):
X = tf.placeholder("float")
Y = tf.placeholder("float")
# 模型参数
W = tf.Variable(tf.random_normal([1]), name="weight")
b = tf.Variable(tf.zeros([1]), name="bias")
global_step = tf.contrib.framework.get_or_create_global_step()#获得迭代次数
# 前向结构
z = tf.multiply(X, W)+ b
tf.summary.histogram('z',z)#将预测值以直方图显示
#反向优化
cost =tf.reduce_mean( tf.square(Y - z))
tf.summary.scalar('loss_function', cost)#将损失以标量显示
learning_rate = 0.01
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost,global_step=global_step) #Gradient descent
saver = tf.train.Saver(max_to_keep=1)
merged_summary_op = tf.summary.merge_all()#合并所有summary
init = tf.global_variables_initializer()
#参数设置
training_epochs = 2200
display_step = 2
sv = tf.train.Supervisor(is_chief=(task_index == 0),
logdir="log/super/",
init_op=init,
summary_op=None,
saver=saver,
global_step=global_step,
save_model_secs=5)
with sv.managed_session(server.target) as sess:
#sess.run(init)
print("sess ok")
print(global_step.eval(session=sess))
for epoch in range(global_step.eval(session=sess),training_epochs*len(train_X)):
for (x, y) in zip(train_X, train_Y):
_, epoch = sess.run([optimizer,global_step] ,feed_dict={X: x, Y: y})
#生成summary
summary_str = sess.run(merged_summary_op,feed_dict={X: x, Y: y});
#将summary 写入文件
sv.summary_computed(sess, summary_str,global_step=epoch)
if epoch % display_step == 0:
loss = sess.run(cost, feed_dict={X: train_X, Y:train_Y})
print ("Epoch:", epoch+1, "cost=", loss,"W=", sess.run(W), "b=", sess.run(b))
if not (loss == "NA" ):
plotdata["batchsize"].append(epoch)
plotdata["loss"].append(loss)
#sv.saver.save(sess,"log/mnist_with_summaries/",global_step=epoch)
print (" Finished!")
sv.saver.save(sess,"log/mnist_with_summaries/"+"sv.cpk",global_step=epoch)
sv.stop()
# -*- coding: utf-8 -*-
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
plotdata = { "batchsize":[], "loss":[] }
def moving_average(a, w=10):
if len(a) < w:
return a[:]
return [val if idx < w else sum(a[(idx-w):idx])/w for idx, val in enumerate(a)]
#生成模拟数据
train_X = np.linspace(-1, 1, 100)
train_Y = 2 * train_X + np.random.randn(*train_X.shape) * 0.3 # y=2x,但是加入了噪声
#图形显示
plt.plot(train_X, train_Y, 'ro', label='Original data')
plt.legend()
plt.show()
tf.reset_default_graph()
strps_hosts="localhost:1681"#,localhost:2222"
strworker_hosts="localhost:1682,localhost:1683"#,192.168.1.183:1680"#,localhost:2224"
strjob_name = "worker"
task_index = 1
ps_hosts = strps_hosts.split(',')
worker_hosts = strworker_hosts.split(',')
cluster_spec = tf.train.ClusterSpec({'ps': ps_hosts,'worker': worker_hosts})
server = tf.train.Server(
{'ps': ps_hosts,'worker': worker_hosts},
job_name=strjob_name,
task_index=task_index)
if strjob_name == 'ps':
print("wait")
server.join()
with tf.device(tf.train.replica_device_setter(
worker_device="/job:worker/task:%d" % task_index,
cluster=cluster_spec)):
X = tf.placeholder("float")
Y = tf.placeholder("float")
# 模型参数
W = tf.Variable(tf.random_normal([1]), name="weight")
b = tf.Variable(tf.zeros([1]), name="bias")
global_step = tf.contrib.framework.get_or_create_global_step()#获得迭代次数
# 前向结构
z = tf.multiply(X, W)+ b
tf.summary.histogram('z',z)#将预测值以直方图显示
#反向优化
cost =tf.reduce_mean( tf.square(Y - z))
tf.summary.scalar('loss_function', cost)#将损失以标量显示
learning_rate = 0.01
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost,global_step=global_step) #Gradient descent
saver = tf.train.Saver(max_to_keep=1)
merged_summary_op = tf.summary.merge_all()#合并所有summary
init = tf.global_variables_initializer()
#参数设置
training_epochs = 2200
display_step = 2
sv = tf.train.Supervisor(is_chief=(task_index == 0),
logdir="log/super/",
init_op=init,
summary_op=None,
saver=saver,
global_step=global_step,
save_model_secs=5)
with sv.managed_session(server.target) as sess:
#sess.run(init)
print("sess ok")
print(global_step.eval(session=sess))
for epoch in range(global_step.eval(session=sess),training_epochs*len(train_X)):
for (x, y) in zip(train_X, train_Y):
_, epoch = sess.run([optimizer,global_step] ,feed_dict={X: x, Y: y})
#生成summary
summary_str = sess.run(merged_summary_op,feed_dict={X: x, Y: y});
#将summary 写入文件
#sv.summary_computed(sess, summary_str,global_step=epoch)
if epoch % display_step == 0:
loss = sess.run(cost, feed_dict={X: train_X, Y:train_Y})
print ("Epoch:", epoch+1, "cost=", loss,"W=", sess.run(W), "b=", sess.run(b))
if not (loss == "NA" ):
plotdata["batchsize"].append(epoch)
plotdata["loss"].append(loss)
#sv.saver.save(sess,"log/mnist_with_summaries/",global_step=epoch)
print (" Finished!")
sv.saver.save(sess,"log/mnist_with_summaries/"+"sv.cpk",global_step=epoch)
sv.stop()
完整代码
# -*- coding: utf-8 -*-
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
plotdata = { "batchsize":[], "loss":[] }
def moving_average(a, w=10):
if len(a) < w:
return a[:]
return [val if idx < w else sum(a[(idx-w):idx])/w for idx, val in enumerate(a)]
#生成模拟数据
train_X = np.linspace(-1, 1, 100)
train_Y = 2 * train_X + np.random.randn(*train_X.shape) * 0.3 # y=2x,但是加入了噪声
#图形显示
plt.plot(train_X, train_Y, 'ro', label='Original data')
plt.legend()
plt.show()
tf.reset_default_graph()
#定义ip和端口
strps_hosts="localhost:1681"
strworker_hosts="localhost:1682,localhost:1683"
#定义角色名称
strjob_name = "ps"
task_index = 0
ps_hosts = strps_hosts.split(',')
worker_hosts = strworker_hosts.split(',')
cluster_spec = tf.train.ClusterSpec({'ps': ps_hosts,'worker': worker_hosts})
#创建server
server = tf.train.Server(
{'ps': ps_hosts,'worker': worker_hosts},
job_name=strjob_name,
task_index=task_index)
#ps角色使用join进行等待
if strjob_name == 'ps':
print("wait")
server.join()
with tf.device(tf.train.replica_device_setter(
worker_device="/job:worker/task:%d" % task_index,
cluster=cluster_spec)):
X = tf.placeholder("float")
Y = tf.placeholder("float")
# 模型参数
W = tf.Variable(tf.random_normal([1]), name="weight")
b = tf.Variable(tf.zeros([1]), name="bias")
global_step = tf.train.get_or_create_global_step()#获得迭代次数
# 前向结构
z = tf.multiply(X, W)+ b
tf.summary.histogram('z',z)#将预测值以直方图显示
#反向优化
cost =tf.reduce_mean( tf.square(Y - z))
tf.summary.scalar('loss_function', cost)#将损失以标量显示
learning_rate = 0.01
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost,global_step=global_step) #Gradient descent
saver = tf.train.Saver(max_to_keep=1)
merged_summary_op = tf.summary.merge_all()#合并所有summary
init = tf.global_variables_initializer()
#参数设置
training_epochs = 2200
display_step = 2
sv = tf.train.Supervisor(is_chief=(task_index == 0),
logdir="log/super/",
init_op=init,
summary_op=None,
saver=saver,
global_step=global_step,
save_model_secs=5)
#连接目标角色创建session
with sv.managed_session(server.target) as sess:
#sess.run(init)
print("sess ok")
print(global_step.eval(session=sess))
for epoch in range(global_step.eval(session=sess),training_epochs*len(train_X)):
for (x, y) in zip(train_X, train_Y):
_, epoch = sess.run([optimizer,global_step] ,feed_dict={X: x, Y: y})
#生成summary
summary_str = sess.run(merged_summary_op,feed_dict={X: x, Y: y});
#将summary 写入文件
sv.summary_computed(sess, summary_str,global_step=epoch)
if epoch % display_step == 0:
loss = sess.run(cost, feed_dict={X: train_X, Y:train_Y})
print ("Epoch:", epoch+1, "cost=", loss,"W=", sess.run(W), "b=", sess.run(b))
if not (loss == "NA" ):
plotdata["batchsize"].append(epoch)
plotdata["loss"].append(loss)
#sv.saver.save(sess,"log/mnist_with_summaries/",global_step=epoch)
print (" Finished!")
sv.saver.save(sess,"log/mnist_with_summaries/"+"sv.cpk",global_step=epoch)
sv.stop()