2017-08-24 65 views
0

我正在训练NN使用强化学习来播放2048。或者至少我认为我是,因为我是新手。ValueError:形状(9,)和(4,)未对齐

这是NeuralNetwork.py样子:

import random 
import numpy as np 

def nonlin(x, deriv=False): 
    if(deriv==True): 
     return x * (1-x) 
    return 1/(1+np.exp(-x)) 


np.random.seed(1) 


class NeuralNetwork: 

    next_ID = 0 

    def __init__(self, HyperParams): 
     self.synapses = [] 
     for synapse in range(len(HyperParams)-1): 
      self.synapses.append(2*np.random.random((HyperParams[synapse], HyperParams[synapse+1]))-1) 
     self.score = 0 
     # self.name = words[random.randint(0, len(words))].strip() 
     self.name = str(NeuralNetwork.next_ID) 
     NeuralNetwork.next_ID += 1 


    def train_batch(self, epoch, state, outcome): 
     for i in range(epoch): 
      self.layers = [] 
      self.layers.append(state) 
      for j in range(len(self.synapses)): 
       self.layers.append(nonlin(np.dot(self.layers[-1], self.synapses[j]))) 

      error = outcome - self.layers[-1] 
      if (i % 1000) == 0: print(str(np.mean(np.abs(error)))) 

      for j in range(1,1+len(self.synapses)): 
       delta = error * nonlin(self.layers[-j], True) 
       error = delta.dot(self.synapses[-j].T) 
       self.synapses[-j] += self.layers[-(j+1)].T.dot(delta) 


    def train(self, state, outcome): 
     self.layers = [] 
     self.layers.append(state) 
     for j in range(len(self.synapses)): 
      self.layers.append(nonlin(np.dot(self.layers[-1], self.synapses[j]))) 

     error = outcome - self.layers[-1] 
     print("error: ", error.shape) 
     for j in range(1,1+len(self.synapses)): 
      delta = error * nonlin(self.layers[-j], True) 
      print("delta: ", delta.shape) 
      error = delta.dot(self.synapses[-j].T) 
      print("layer: ", self.layers[-(j+1)].shape) 
      print("layer.T: ", self.layers[-(j+1)].T.shape) 

      # this is the issue 
      print("dot: ", self.layers[-(j+1)].T.dot(delta).shape) 
      self.synapses[-j] += self.layers[-(j+1)].T.dot(delta) 


    def next_gen(self): 
     child = NeuralNetwork([1]) 
     for synapse in self.synapses: 
      # add variation 
      child.synapses.append(synapse + 0.1*np.random.random(synapse.shape)-0.05) 
     # child.name += " son of " + self.name 
     child.name += "<-" + self.name 
     return child 

    def feed(self, state): 
     self.layers = [] 
     self.layers.append(state) 
     for j in range(len(self.synapses)): 
      self.layers.append(nonlin(np.dot(self.layers[-1], self.synapses[j]))) 
     return self.layers[-1] 

这是2048.py样子:

import random 
import os 
import sys 
import math 
import numpy as np 
from NeuralNetwork import * 

# global vars, constants and setup 
board = {} 
row_size = 4 
random.seed(1) 
HP = (16,9,4) 

# set up game board 
for i in range(row_size): # row 
    for j in range(row_size): #column 
     board[(i,j)] = 0 


# display function 
def display(): 
    for i in range(row_size): 
     print('\t'.join([str(board[(i,j)]) for j in range(row_size)])) 
    print() 


# logic function 
def logic(move, NN): 
    """ 
    char move is the move, one of any in "asdw" 
    NN is a NeuralNetwork object 
    """ 
    # print("mov", move) 
    score = 0 
    if move == 's': 
     for j in range(row_size): # columns 
      row_pointer = row_size-1 
      for i in reversed(range(row_size-1)): # go up the rows 
       if board[(i, j)] != 0: 
        # if there is a non-empty square above, and this is a zero #check 
        if board[(row_pointer, j)] == 0: 
         board[(row_pointer, j)] = board[(i, j)] 
         board[(i, j)] = 0 
         # row_pointer -= 1 # This is the new block to focus on 

        # if there is a non-empty square above, and they are not equivalent 
        elif board[(i, j)] != board[(row_pointer, j)]: 
         # while this intuitively is not a swap, without it I would need to zero board[(i,j)] 
         # that zero would cause problems if row_pointer-1 == i 
         board[(row_pointer-1, j)], board[(i, j)] = board[(i, j)], board[(row_pointer-1, j)] 
         row_pointer -= 1 # This is the new block to focus on 

        # if there is a non-empty square above, and they are the same 
        elif board[(i, j)] == board[(row_pointer, j)]: 
         board[(row_pointer, j)] += board[(i, j)] 
         board[(i, j)] = 0 
         score += board[(row_pointer, j)] + math.log(board[(row_pointer, j)], 2) 
    elif move == 'w': 
     for j in range(row_size): # columns 
      row_pointer = 0 
      for i in range(1, row_size): # go down the rows 
       if board[(i, j)] != 0: 
        # if there is a non-empty square above, and this is a zero 
        if board[(row_pointer, j)] == 0: 
         board[(row_pointer, j)] = board[(i, j)] 
         board[(i, j)] = 0 

        # if there is a non-empty square above, and they are not equivalent 
        elif board[(i, j)] != board[(row_pointer, j)]: 
         board[(row_pointer+1, j)], board[(i, j)] = board[(i, j)], board[(row_pointer+1, j)] 
         row_pointer += 1 # This is the new block to focus on 

        # if there is a non-empty square above, and they are the same 
        elif board[(i, j)] == board[(row_pointer, j)]: 
         board[(row_pointer, j)] += board[(i, j)] 
         board[(i, j)] = 0 
         score += board[(row_pointer, j)] + math.log(board[(row_pointer, j)], 2) 
    elif move == 'a': 
     for i in range(row_size): # rows 
      column_pointer = 0 
      for j in range(1, row_size): # go right through the columns 
       if board[(i, j)] != 0: 
        # if there is a non-empty square above, and this is a zero 
        if board[(i, column_pointer)] == 0: 
         board[(i, column_pointer)] = board[(i, j)] 
         board[(i, j)] = 0 

        # if there is a non-empty square above, and they are not equivalent 
        elif board[(i, j)] != board[(i, column_pointer)]: 
         board[(i, column_pointer+1)], board[(i, j)] = board[(i, j)], board[(i, column_pointer+1)] 
         column_pointer += 1 # This is the new block to focus on 

        # if there is a non-empty square above, and they are the same 
        elif board[(i, j)] == board[(i, column_pointer)]: 
         board[(i, column_pointer)] += board[(i, j)] 
         board[(i, j)] = 0 
         score += board[(i, column_pointer)] + math.log(board[(i, column_pointer)], 2) 
    elif move == 'd': 
     for i in range(row_size): # rows 
      column_pointer = row_size-1 
      for j in reversed(range(row_size-1)): # go left through the columns 
       if board[(i, j)] != 0: 
        # if there is a non-empty square above, and this is a zero 
        if board[(i, column_pointer)] == 0: 
         board[(i, column_pointer)] = board[(i, j)] 
         board[(i, j)] = 0 

        # if there is a non-empty square above, and they are not equivalent 
        elif board[(i, j)] != board[(i, column_pointer)]: 
         board[(i, column_pointer-1)], board[(i, j)] = board[(i, j)], board[(i, column_pointer-1)] 
         column_pointer -= 1 # This is the new block to focus on 

        # if there is a non-empty square above, and they are the same 
        elif board[(i, j)] == board[(i, column_pointer)]: 
         board[(i, column_pointer)] += board[(i, j)] 
         board[(i, j)] = 0 
         score += board[(i, column_pointer)] + math.log(board[(i, column_pointer)], 2) 

    else: 
     print("something is wrong") 
    NN.score += score 
    return score 


# checks to see whether there are any valid moves in a full board with no 0's 
def is_game_over(): 
    # check the top-left square 
    for i in range(row_size-1): 
     for j in range(row_size-1): 
      if board[(i,j)] in [board[(i+1,j)], board[(i,j+1)]]: # check the one below and to the right 
       return False 
    # Check the right-most column 
    for j in range(row_size-1): 
     if board[(row_size-1,j)] == board[(row_size-1,j+1)]: 
      return False 
    # Check the bottom row 
    for i in range(row_size-1): 
     if board[(i,row_size-1)] == board[(i+1,row_size-1)]: 
      return False 
    # There is no way to combine, game over 
    return True 


# NN controls 
NN = NeuralNetwork(HP) 

for step in range(10): 
    # set up game board 
    for i in range(row_size): # row 
     for j in range(row_size): #column 
      board[(i,j)] = 0 

    previous_board = [] 
    quit = False 
    # game loop 
    while not quit: 
     # set a new empty tile to a 2 
     while True: 
      i = random.randint(0,row_size-1) 
      j = random.randint(0,row_size-1) 
      # print(i,j,board[(i,j)]) 
      if board[(i,j)] != 0: continue 
      else: board[(i,j)] = 2 ; break 


     # View 
     # display() 


     # normalize data and make a guess with nn 
     state = np.array([board[(i,j)] for j in range(row_size) for i in range(row_size)]) 
     state[state==0] = 1 
     state = np.log2(state) 
     state = state/np.max(state) 
     # print('\n'.join(['\t'.join([str(state[j*row_size+i]) for j in range(row_size)])for i in range(row_size)])) 
     move = NN.feed(state) 


     # move 
     reward = 0 
     previous_board = list(board.values()) 
     while True: 
      if len(move[move == 0]) == 4: 
       if is_game_over(): 
        # print("Game Over") 
        quit = True 
        break 
      reward = logic("asdw"[move.argmax()], NN) 
      if previous_board == list(board.values()): move[move.argmax()] = 0 ; continue 
      else: break 

     if reward: 
      reward = nonlin(math.log2(reward)-math.log2(2048)) 
      move[np.argmax(move)] += reward 
      NN.train(state, move) 
    display() 


    print("score: " + str(NN.score)) 

    NN.score = 0 

有人告诉我,numpy的会知道该怎么做,当它遇到了两个一维阵列点缀,但没有发生。我应该使这些数组2D,其内部维度为1?你能帮忙吗?

这里是完整的错误:

Traceback (most recent call last): 
    File "2048.py", line 195, in <module> 
    NN.train(state, move) 
    File "/home/jeff/Programs/grad_descent/NeuralNetwork.py", line 71, in train 
    print("dot: ", self.layers[-(j+1)].T.dot(delta).shape) 
ValueError: shapes (9,) and (4,) not aligned: 9 (dim 0) != 4 (dim 0) 

正如你可以看到,他们是一维向量,所以numpy的应该只是他们点。

+0

媒介仍需要具有相同的尺寸,以点积他们。你是想要内部产品,还是外部产品(你期望结果是单个数字还是9x4矩阵)? –

+0

是的,我试图从他们的点上制作一个9x4的ndarray。 – Jeff

+1

然后它听起来像你想要的[外部产品](https://docs.scipy.org/doc/numpy/reference/generated/numpy.outer.html),而不是内在产品。 –

回答

0

如果您使用np.newaxis给出明确的1-D列表示法,它将起作用。

注意:如果您正在寻找标量输出,则这两个向量需要为equal length。 OP中的错误消息显示您正在尝试使用长度为9的点积和长度为4的向量。我假设你真的想要.dot()返回一个外部产品。如果没有,内部产品将无法正常工作 - 在这种情况下,试着找出为什么你没有得到两个等长的向量,你期望看到它们。

随着:

a = np.array([1,2,3]) 
b = np.array([2,3,4,5]) 

分别的a形状和b(3,)(4,),:

try: 
    print(a.shape) 
    print(b.shape) 
    print("a.b: \n{}".format(np.dot(a,b.T))) 
except ValueError as e: 
    print("failed: {}".format(e)) 

输出:

(3,) 
(4,) 
failed: shapes (3,) and (4,) not aligned: 3 (dim 0) != 4 (dim 0) 

随着newaxis,形状变得(3,1)(4,1)

aa = a[:, np.newaxis] 
bb = b[:, np.newaxis] 

try: 
    print(aa.shape) 
    print(bb.shape) 
    print("aa.bb: \n{}".format(np.dot(aa,bb.T))) 
except ValueError as e: 
    print("failed: {}".format(e)) 

输出:

(3, 1) 
(4, 1) 
aa.bb: 
[[ 2 3 4 5] 
[ 4 6 8 10] 
[ 6 9 12 15]] 
相关问题