2013-10-01 58 views
0

排序比方说,我有以下的有序列表:合并有间隙

a = ['8EF5CD1B', 'B1392DB3', '59770CD6', 'BD23F32A', '4EFA3222'] 
b = ['8EF5CD1B', '96276D30', 'B1392DB3', '59770CD6', '4EFA3222'] 
c = ['96276D30', 'B1392DB3', 'BD23F32A', '59770CD6'] 

我希望他们通过合并从低优先级列表填补空白排序。

>>> from itertools import permutations 
>>> LISTS = (a, b, c) 
>>> for (first, second) in permutations(LISTS, 2): 
...  print((LISTS.index(first), LISTS.index(second)), magic(first, second)) 
... 
(0, 1) ['8EF5CD1B', '96276D30', 'B1392DB3', '59770CD6', 'BD23F32A', '4EFA3222'] 
(0, 2) ['8EF5CD1B', '96276D30', 'B1392DB3', '59770CD6', 'BD23F32A', '4EFA3222'] 
(1, 0) ['8EF5CD1B', '96276D30', 'B1392DB3', '59770CD6', 'BD23F32A', '4EFA3222'] 
(1, 2) ['8EF5CD1B', '96276D30', 'B1392DB3', 'BD23F32A', '59770CD6', '4EFA3222'] 
(2, 0) ['96276D30', '8EF5CD1B', 'B1392DB3', 'BD23F32A', '59770CD6', '4EFA3222'] 
(2, 1) ['8EF5CD1B', '96276D30', 'B1392DB3', 'BD23F32A', '59770CD6', '4EFA3222'] 
>>>magic(*LISTS) 
['8EF5CD1B', '96276D30', 'B1392DB3', '59770CD6', 'BD23F32A', '4EFA3222'] 

正如你可以看到(0,1)96276D30去,因为那里是由b名单有填补了一个空白的第二位。如果有订单,优先顺序是第一个参数。魔术功能应该与两个以上的参数一起工作,就像上面的例子一样。我编写了一个可行的代码,但对于这样一个看起来很简单的任务来说,它是丑陋的(可能太慢)。

MAX_ITERATIONS = 1000 
class UnjoinableListsError(Exception): pass 

def magic(*lists, iterations=MAX_ITERATIONS): 
    """ 
    Returns a joint sorted list of presorted lists (or tuples). 

    First it checks for common items, then it defines a gap list to put 
    non-commons in. Finally it mixes them all. If items of more presorted 
    list (or tuple) competes for a gap place, they will sorted in order 
    of their parents were in arguments. 
    """ 
    def sort_two(first, second): 
     commons = [item for item in first if item in second] 
     gap_list = [[] for i in range(len(commons)+1)] 
     for l in (first, second): 
      gap_item = [] 
      sliced = [] 
      for common_item in commons: 
       common_i = l.index(common_item) 
       sliced.append((list(l[:common_i]), list(l[common_i+1:]))) 
      gap_item.append(sliced[0][0]) 
      for j in range(len(sliced) - 1): 
       gap_item.append([item for item in sliced[j][1] 
            if item in sliced[j+1][0]]) 
      gap_item.append(sliced[-1][1]) 
      for j, item in enumerate(gap_item): 
       gap_list[j].extend([i for i in item if i not in commons]) 
     result = [] 
     result.extend(gap_list[0]) 
     for i in range(len(commons)): 
      result.append(commons[i]) 
      result.extend(gap_list[i+1]) 
     return result 

    result = lists[0] 
    index_set = {i for i in range(1, len(lists))} 
    it = iterations 
    while index_set and it > 0: 
     it -= 1 
     if it == 0: 
      raise UnjoinableListsError('The lists at argument index {}'+ 
       'are unjoinable.'.format(str(index_set))) 
     i = index_set.pop() 
     try: 
      result = sort_two(result, lists[i]) 
     except: 
      index_set.add(i) 
    return result 

有一些清晰而简单的解决方案我错过了什么?感谢您的回答。

+0

_“比方说,我有以下的有序列表:” _。你确定他们是排序?在列表a中,'59770CD6'在'BD23F32A'之前。在列表c中,'59770CD6'在'BD23F32A'之后。 – Kevin

+0

这些是对象引用的crc32哈希值。是的,他们是预先分类的。否则,我可以很容易地使用'heapq.merge()'。 – SzieberthAdam

回答

0

好吧,没有答案让我重做这件事。这里是一个很好地工作代码:

def joint_sorted(*sequences): 
    """Sorts two or more presorted sequences. The priority is in 
decreasing order for the case of unambiguous elem order. 

>>> joint_sorted([1,3,4,5,6], [1,2,4,6,7], [6, 10, 11], [12, 11, 17]) 
[1, 3, 2, 4, 5, 6, 7, 10, 12, 11, 17] 
>>> joint_sorted('adgth', 'dbgjhk') 
'adbgtjhk'""" 
    def for_two(first_seq, second_seq): 
     first_set, second_set = set(first_seq), set(second_seq) 
     if (len(first_seq) != len(first_set) 
      or len(second_seq) != len(second_set)): 
      raise TypeError("The sequences must contain " 
       "unique elems only!") 
     common_elems = first_set & second_set 
     before, buf = {}, [] 
     for i, e in iter(enumerate(second_seq)): 
      if e in common_elems: 
       before[e], buf = buf, [] 
      else: 
       buf.append(e) 
     result = [] 
     for e in first_seq: 
      if e in before: 
       result.extend(before[e]) 
      result.append(e) 
     result.extend(buf) 
     if isinstance(first_seq, str): 
      return ''.join(result) 
     return first_seq.__class__(result) 
    first_seq = sequences[0] 
    for i in range(1, len(sequences)): 
     first_seq = for_two(first_seq, sequences[i]) 
    return first_seq