2016-06-21 48 views
0

我得到了实现我的版本深特征选择的想法就是从这里开始的纸,http://link.springer.com/chapter/10.1007%2F978-3-319-16706-0_20为什么这里的套索没有为我提供零系数?

根据本文深特征选择的基本思路是之前任何全连接隐藏添加一个一对一的映射层层,然后通过添加正则化项(无论是套索还是弹性网)来在输入层权重中生成零。

我的问题是,即使看起来我已经很好地实现了深度特征选择框架,但在测试由numpy.rand.random(1000,50)生成的随机数据时,未能给出关于初始权重的任何零。像正规化一样,套索是常见的事情吗?我是否会调整我用于这个框架的参数(甚至更大的时代)?或者我的代码有问题吗?

class DeepFeatureSelectionMLP: 
    def __init__(self, X, Y, hidden_dims=[100], epochs=1000, 
       lambda1=0.001, lambda2=1.0, alpha1=0.001, alpha2=0.0, learning_rate=0.1): 
     # Initiate the input layer 

     # Get the dimension of the input X 
     n_sample, n_feat = X.shape 
     n_classes = len(np.unique(Y)) 

     # One hot Y 
     one_hot_Y = np.zeros((len(Y), n_classes)) 
     for i,j in enumerate(Y): 
      one_hot_Y[i][j] = 1 

     self.epochs = epochs 

     Y = one_hot_Y 

     # Store up original value 
     self.X = X 
     self.Y = Y 

     # Two variables with undetermined length is created 
     self.var_X = tf.placeholder(dtype=tf.float32, shape=[None, n_feat], name='x') 
     self.var_Y = tf.placeholder(dtype=tf.float32, shape=[None, n_classes], name='y') 

     self.input_layer = One2OneInputLayer(self.var_X) 

     self.hidden_layers = [] 
     layer_input = self.input_layer.output 

     # Create hidden layers 
     for dim in hidden_dims: 
      self.hidden_layers.append(DenseLayer(layer_input, dim)) 
      layer_input = self.hidden_layers[-1].output 

     # Final classification layer, variable Y is passed 
     self.softmax_layer = SoftmaxLayer(self.hidden_layers[-1].output, n_classes, self.var_Y) 

     n_hidden = len(hidden_dims) 

     # regularization terms on coefficients of input layer 
     self.L1_input = tf.reduce_sum(tf.abs(self.input_layer.w)) 
     self.L2_input = tf.nn.l2_loss(self.input_layer.w) 

     # regularization terms on weights of hidden layers   
     L1s = [] 
     L2_sqrs = [] 
     for i in xrange(n_hidden): 
      L1s.append(tf.reduce_sum(tf.abs(self.hidden_layers[i].w))) 
      L2_sqrs.append(tf.nn.l2_loss(self.hidden_layers[i].w)) 

     L1s.append(tf.reduce_sum(tf.abs(self.softmax_layer.w))) 
     L2_sqrs.append(tf.nn.l2_loss(self.softmax_layer.w)) 

     self.L1 = tf.add_n(L1s) 
     self.L2_sqr = tf.add_n(L2_sqrs) 

     # Cost with two regularization terms 
     self.cost = self.softmax_layer.cost \ 
        + lambda1*(1.0-lambda2)*0.5*self.L2_input + lambda1*lambda2*self.L1_input \ 
        + alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2*self.L1 

     self.optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.cost) 

     self.y = self.softmax_layer.y 

    def train(self, batch_size=100): 
     sess = tf.Session() 
     sess.run(tf.initialize_all_variables()) 

     for i in xrange(self.epochs): 
      x_batch, y_batch = get_batch(self.X, self.Y, batch_size) 
      sess.run(self.optimizer, feed_dict={self.var_X: x_batch, self.var_Y: y_batch}) 
      if (i + 1) % 50 == 0: 
       l = sess.run(self.cost, feed_dict={self.var_X: x_batch, self.var_Y: y_batch}) 
       print('epoch {0}: global loss = {1}'.format(i, l)) 
       self.selected_w = sess.run(self.input_layer.w) 
       print(self.selected_w) 

class One2OneInputLayer(object): 
    # One to One Mapping! 
    def __init__(self, input): 
     """ 
      The second dimension of the input, 
      for each input, each row is a sample 
      and each column is a feature, since 
      this is one to one mapping, n_in equals 
      the number of features 
     """ 
     n_in = input.get_shape()[1].value 

     self.input = input 

     # Initiate the weight for the input layer 
     w = tf.Variable(tf.zeros([n_in,]), name='w') 

     self.w = w 
     self.output = self.w * self.input 
     self.params = [w] 

class DenseLayer(object): 
    # Canonical dense layer 
    def __init__(self, input, n_out, activation='sigmoid'): 
     """ 
      The second dimension of the input, 
      for each input, each row is a sample 
      and each column is a feature, since 
      this is one to one mapping, n_in equals 
      the number of features 

      n_out defines how many nodes are there in the 
      hidden layer 
     """ 
     n_in = input.get_shape()[1].value 
     self.input = input 

     # Initiate the weight for the input layer 

     w = tf.Variable(tf.ones([n_in, n_out]), name='w') 
     b = tf.Variable(tf.ones([n_out]), name='b') 

     output = tf.add(tf.matmul(input, w), b) 
     output = activate(output, activation) 

     self.w = w 
     self.b = b 
     self.output = output 
     self.params = [w] 

class SoftmaxLayer(object): 
    def __init__(self, input, n_out, y): 
     """ 
      The second dimension of the input, 
      for each input, each row is a sample 
      and each column is a feature, since 
      this is one to one mapping, n_in equals 
      the number of features 

      n_out defines how many nodes are there in the 
      hidden layer 
     """ 
     n_in = input.get_shape()[1].value 
     self.input = input 

     # Initiate the weight and biases for this layer 
     w = tf.Variable(tf.random_normal([n_in, n_out]), name='w') 
     b = tf.Variable(tf.random_normal([n_out]), name='b') 

     pred = tf.add(tf.matmul(input, w), b) 

     cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y)) 

     self.y = y 
     self.w = w 
     self.b = b 
     self.cost = cost 
     self.params= [w] 
+0

您使用均匀分布来生成数据。您可以尝试人为创建异常值,例如500行的值在1000和1001之间,而不是0和1.然后该算法可以分配一个零权重。 (我不知道套索和深层特征选择是什么)。在代码中,我也没有看到如何生成1000个值。您还可以发布代码的生成和使用情况。 –

+0

你也可以做一些简单的例子,你知道答案。像5x5矩阵一样,您可以手动输入数值并知道预期结果。 –

回答

1

诸如Adam的渐变下降算法在使用l1正则化时不给出确切的零。相反,类似ftrlproximal adagrad可以给你确切的零。

相关问题