2017-04-12 39 views
1

我有一个序列到序列的情况下的一些培训示例,其中存储为tf.train.SequenceExample在一个(或多个)文件中写入TFRecordWriter。我想阅读,解码他们,并将他们洗过的批量进入我的网络。我一直在努力与文件和一些教程在这里和那里找到,但我无法做出任何这样的东西。我正在研究一个独立的示例,下面在这里。配料和洗牌填充tf.train.Sequence范例

import random 

import tensorflow as tf 

from six.moves import xrange 


MIN_LEN = 6 
MAX_LEN = 12 
NUM_EXAMPLES = 20 
BATCH_SIZE = 3 
PATH = 'ciaone.tfrecords' 
MIN_AFTER_DEQUEUE = 10 
NUM_THREADS = 2 
SAFETY_MARGIN = 1 
CAPACITY = MIN_AFTER_DEQUEUE + (NUM_THREADS + SAFETY_MARGIN) * BATCH_SIZE 


def generate_example(): 
    # fake examples which are just useful to have a quick visualization. 
    # The input is a sequence of random numbers. 
    # The output is a sequence made of those numbers from the 
    # input sequence which are greater or equal then the average. 
    length = random.randint(MIN_LEN, MAX_LEN) 
    input_ = [random.randint(0, 10) for _ in xrange(length)] 
    avg = sum([1.0 * item for item in input_])/len(input_) 
    output = [item for item in input_ if item >= avg] 
    return input_, output 


def encode(input_, output): 
    length = len(input_) 
    example = tf.train.SequenceExample(
     context=tf.train.Features(
      feature={ 
       'length': tf.train.Feature(
        int64_list=tf.train.Int64List(value=[length])) 
      }), 
     feature_lists=tf.train.FeatureLists(
      feature_list={ 
       'input': tf.train.FeatureList(
        feature=[ 
         tf.train.Feature(
          int64_list=tf.train.Int64List(value=[item])) 
         for item in input_]), 
       'output': tf.train.FeatureList(
        feature=[ 
         tf.train.Feature(
          int64_list=tf.train.Int64List(value=[item])) 
         for item in output]) 
      } 
     ) 
    ) 
    return example 


def decode(example): 
    context_features = { 
     'length': tf.FixedLenFeature([], tf.int64) 
    } 
    sequence_features = { 
     'input': tf.FixedLenSequenceFeature([], tf.int64), 
     'output': tf.FixedLenSequenceFeature([], tf.int64) 
    } 
    ctx, seq = tf.parse_single_sequence_example(
     example, context_features, sequence_features) 
    input_ = seq['input'] 
    output = seq['output'] 
    return input_, output 

if __name__ == '__main__': 
    # STEP 1. -- generate a dataset. 
    with tf.python_io.TFRecordWriter(PATH) as writer: 
     for _ in xrange(NUM_EXAMPLES): 
      record = encode(*generate_example()) 
      writer.write(record.SerializeToString()) 

    with tf.Session() as sess: 
     queue = tf.train.string_input_producer([PATH]) 
     reader = tf.TFRecordReader() 
     _, value = reader.read(queue) 
     input_, output = decode(value) 

     # HERE I AM STUCK! 

     coord = tf.train.Coordinator() 
     threads = tf.train.start_queue_runners(coord=coord) 
     sess.run(tf.local_variables_initializer()) 
     sess.run(tf.global_variables_initializer()) 
     try: 
      while True: 
       # do something... 
     except tf.errors.OutOfRangeError, e: 
      coord.request_stop(e) 
     finally: 
      coord.request_stop() 
      coord.join(threads) 
     coord.request_stop() 
     coord.join(threads) 

任何人都可以告诉我如何继续? 在此先感谢!

P.S.作为一个方面的请求:任何关于资源的指针能够更好地理解TensorFlow的输入流水线API,这是值得赞赏的。

回答

1

如果您正在处理Example s而不是SequenceExample s,那么就像在您的解码张量上添加对tf.train.shuffle_batch的调用一样简单。

_, value = reader.read(queue) 
input_, output = decode(value) 
batch_input, batch_output = tf.train.shuffle_batch([input_, output], 
    batch_size=BATCH_SIZE, capacity=CAPACITY, 
    min_after_sequeue=MIN_AFTER_DEQUEUE) 

但是,洗牌批次要求您传入的张量具有静态形状,这在这里不是真实的。对于可变形状张量,您可以改为使用tf.train.batchdynamic_pad=True。这将为您处理批处理(和填充),但不会洗牌您的示例。不幸的是,shuffle_batch不采取dynamic_pad的说法。

有一个变通方法described here在那里你可以在呼叫前加RandomShuffleQueuetf.train.batch

inputs = decode(value) 
dtypes = list(map(lambda x: x.dtype, inputs)) 
shapes = list(map(lambda x: x.get_shape(), inputs)) 
queue = tf.RandomShuffleQueue(CAPACITY, MIN_AFTER_DEQUEUE, dtypes) 
enqueue_op = queue.enqueue(inputs) 
qr = tf.train.QueueRunner(queue, [enqueue_op] * NUM_THREADS) 
tf.add_to_collection(tf.GraphKeys.QUEUE_RUNNERS, qr) 
inputs = queue.dequeue() 
for tensor, shape in zip(inputs, shapes): 
    tensor.set_shape(shape) 

# Now you can use tf.train.batch with dynamic_pad=True, and the order in which 
# it enqueues elements will be permuted because of RandomShuffleQueue. 
batch_input, batch_output = tf.train.batch(inputs, batch_size, capacity=capacity, 
           dynamic_pad=True, name=name) 

有实现here这种模式的一个例子(在谷歌的品红项目)。

+0

这正是我对“示例”所做的,但仍然需要弄清楚如何处理SequenceExamples。感谢您指出github问题! – petrux