编辑补充: 我发现了什么,我认为是一个可行的解决方案:https://bleyddyn.github.io/posts/2017/10/keras-lstm/Keras LSTM预测1个时间步长在一个时间
我试图用一个转换次数/ LSTM网络控制机器人。我想我已经设置了一切,所以我可以开始在重播内存中对批量数据进行培训,但我无法弄清楚如何实际使用它来控制机器人。简体测试代码如下。
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Flatten, Input
from keras.layers import Convolution2D
from keras.layers.recurrent import LSTM
from keras.layers.wrappers import TimeDistributed
from keras.utils import to_categorical
def make_model(num_actions, timesteps, input_dim, l2_reg=0.005):
input_shape=(timesteps,) + input_dim
model = Sequential()
model.add(TimeDistributed(Convolution2D(8, (3, 3), strides=(2,2), activation='relu'), input_shape=input_shape))
model.add(TimeDistributed(Convolution2D(16, (3, 3), strides=(2,2), activation='relu',)))
model.add(TimeDistributed(Convolution2D(32, (3, 3), strides=(2,2), activation='relu',)))
model.add(TimeDistributed(Flatten()))
model.add(LSTM(512, return_sequences=True, activation='relu', unroll=True))
model.add(Dense(num_actions, activation='softmax',))
model.compile(loss='categorical_crossentropy', optimizer='adam')
return model
batch_size = 16
timesteps = 10
num_actions = 6
model = make_model(num_actions, timesteps, (84,84,3))
model.summary()
# Fake training batch. Would be pulled from a replay memory
batch = np.random.uniform(low=0, high=255, size=(batch_size,timesteps,84,84,3))
y = np.random.randint(0, high=5, size=(160))
y = to_categorical(y, num_classes=num_actions)
y = y.reshape(batch_size, timesteps, num_actions)
# stateful should be false here
pred = model.train_on_batch(batch, y)
# move trained network to robot
# This works, but it isn't practical to not get outputs (actions) until after 10 timesteps and I don't think the LSTM internal state would be correct if I tried a rolling queue of input images.
batch = np.random.uniform(low=0, high=255, size=(1,timesteps,84,84,3))
pred = model.predict(batch, batch_size=1)
# This is what I would need to do on my robot, with the LSTM keeping state between calls to predict
max_time = 10 # or 100000, or forever, etc.
for i in range(max_time) :
image = np.random.uniform(low=0, high=255, size=(1,1,84,84,3)) # pull one image from camera
# stateful should be true here
pred = model.predict(image, batch_size=1)
# take action based on pred
我上的错误“model.predict(图片...”行是:
ValueError: Error when checking : expected time_distributed_1_input to have shape (None, 10, 84, 84, 3) but got array with shape (1, 1, 84, 84, 3)
这是可以理解的,但我不能找到办法解决它 我。不知道Keras不够好,甚至不知道如果我正确使用TimeDistributed层。
那么,这甚至可能在Keras?如果是这样,怎么样?
如果没有,是否有可能在TF或Py火炬?
感谢您的任何建议!
编辑添加运行代码,虽然它不一定是正确的。仍然需要在OpenAI健身房任务中进行测试。
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Flatten, Input
from keras.layers import Convolution2D
from keras.layers.recurrent import LSTM
from keras.layers.wrappers import TimeDistributed
from keras.utils import to_categorical
def make_model(num_actions, timesteps, input_dim, l2_reg=0.005):
input_shape=(1,None) + input_dim
model = Sequential()
model.add(TimeDistributed(Convolution2D(8, (3, 3), strides=(2,2), activation='relu'), batch_input_shape=input_shape))
model.add(TimeDistributed(Convolution2D(16, (3, 3), strides=(2,2), activation='relu',)))
model.add(TimeDistributed(Convolution2D(32, (3, 3), strides=(2,2), activation='relu',)))
model.add(TimeDistributed(Flatten()))
model.add(LSTM(512, return_sequences=True, activation='relu', stateful=True))
model.add(Dense(num_actions, activation='softmax',))
model.compile(loss='categorical_crossentropy', optimizer='adam')
return model
batch_size = 16
timesteps = 10
num_actions = 6
model = make_model(num_actions, 1, (84,84,3))
model.summary()
# Fake training batch. Would be pulled from a replay memory
batch = np.random.uniform(low=0, high=255, size=(batch_size,timesteps,84,84,3))
y = np.random.randint(0, high=5, size=(160))
y = to_categorical(y, num_classes=num_actions)
y = y.reshape(batch_size, timesteps, num_actions)
# Need to find a way to prevent the optimizer from updating every b, but accumulate updates over an entire batch (batch_size).
for b in range(batch_size):
pred = model.train_on_batch(np.reshape(batch[b,:], (1,timesteps,84,84,3)), np.reshape(y[b,:], (1,timesteps,num_actions)))
#for t in range(timesteps):
# pred = model.train_on_batch(np.reshape(batch[b,t,:], (1,1,84,84,3)), np.reshape(y[b,t,:], (1,1,num_actions)))
model.reset_states() # Don't carry internal state between batches
# move trained network to robot
# This works, but it isn't practical to not get outputs (actions) until after 10 timesteps
#batch = np.random.uniform(low=0, high=255, size=(1,timesteps,84,84,3))
#pred = model.predict(batch, batch_size=1)
# This is what I would need to do on my robot, with the LSTM keeping state between calls to predict
max_time = 10 # or 100000, or forever, etc.
for i in range(max_time) :
image = np.random.uniform(low=0, high=255, size=(1,1,84,84,3)) # pull one image from camera
# stateful should be true here
pred = model.predict(image, batch_size=1)
# take action based on pred
print(pred)
机器人处理图像吗?这是84 x 84应该是什么? –
是,84x84x3(宽度,高度,颜色通道)。 – Bleyddyn