2017-08-21 121 views
0

我正在尝试实现针对有向图的深度优先搜索(DFS)算法,如Cormen等人Introduction to Algorithms (3rd ed.)中所述。这里是我的执行至今:深度优先搜索中的Python“RuntimeError:最大递归深度”

import pytest 
from collections import OrderedDict 
import copy 


class Node(object): 
    def __init__(self, color='white', parent=None, d=None, f=None): 
     self.color = color 
     self.parent = parent 
     self.d = d   # Discovery time 
     self.f = f   # Finishing time 


class Graph(object): 
    def __init__(self, edges, node_indices=None): 
     self.edges = edges 
     self.nodes = self.initialize_nodes(node_indices) 
     self.adj = self.initialize_adjacency_list() 

    def initialize_nodes(self, node_indices=None): 
     if node_indices is None: 
      node_indices = sorted(list(set(node for edge in self.edges for node in edge))) 
     return OrderedDict([(node_index, Node()) for node_index in node_indices]) 

    def initialize_adjacency_list(self): 
     A = {node: [] for node in self.nodes} 
     for edge in self.edges: 
      u, v = edge 
      A[u].append(v) 
     return A 

    def dfs(self): 
     self.time = 0 
     for u, node in self.nodes.items(): 
      if node.color == 'white': 
       self.dfs_visit(u) 

    def dfs_visit(self, u): 
     self.time += 1 
     self.nodes[u].d = self.time 
     self.nodes[u].color = 'gray' 
     for v in self.adj[u]: 
      if self.nodes[v].color == 'white': 
       self.nodes[v].parent = u 
       self.dfs_visit(v) 
     self.nodes[u].color = 'black' 
     self.time += 1 
     self.nodes[u].f = self.time 

    @staticmethod 
    def transpose(edges): 
     return [(v,u) for (u,v) in edges] 

    def strongly_connected_components(self): 
     self.dfs() 
     finishing_times = {u: node.f for u, node in self.nodes.items()} 
     self.__init__(self.transpose(self.edges)) 
     node_indices = sorted(finishing_times, key=finishing_times.get, reverse=True) 
     self.nodes = self.initialize_nodes(node_indices) 
     self.dfs() 
     return self.trees() 

    def trees(self): 
     _trees = [] 
     nodes = copy.deepcopy(self.nodes) 
     while nodes: 
      for u, node in nodes.items(): 
       if node.parent is None: 
        _trees.append([u]) 
        nodes.pop(u) 
       else: 
        for tree in _trees: 
         if node.parent in tree: 
          tree.append(u) 
          nodes.pop(u) 
     return _trees 

为了测试它的作品,我已经采取了一个例子,从书中的图22.9:

enter image description here

一个重命名节点后ħ18分别我跑了以下测试:

def test_strongly_connected_components(): 
    edges = [(1,2), (5,1), (2,5), (5,6), (2,6), (6,7), (7,6), (2,3), (3,7), (3,4), (4,3), (4,8), (7,8), (8,8)] 
    graph = Graph(edges) 
    assert graph.strongly_connected_components() == [[1, 5, 2], [3, 4], [6, 7], [8]] 


if __name__ == "__main__": 
    pytest.main([__file__+"::test_strongly_connected_components", "-s"]) 

该测试通过,确认图中灰色SCC。然而,对于'真正'的练习,我需要使用一个输入文件SCC.txt,其中包含875,714行代表边缘(作为头尾对整数),并输出五个最大SCC的大小。为此我尝试以下测试:

@pytest.fixture 
def edges(): 
    with open('SCC.txt') as f: 
     return [tuple(map(int, line.split())) for line in f.read().splitlines()] 

def test_SCC_on_full_graph(edges): 
    graph = Graph(edges) 
    SCCs = graph.strongly_connected_components() 
    print([map(len, SCCs)].sort(reverse=True))  # Read off the size of the largest SCCs 


if __name__ == "__main__": 
    pytest.main([__file__+"::test_SCC_on_full_graph", "-s"]) 

不过,我碰到一个RuntimeError: maximum recursion depth exceeded in cmp

_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <scc.Graph object at 0x103253690>, u = 209099 

    def dfs_visit(self, u): 
      self.time += 1 
      self.nodes[u].d = self.time 
      self.nodes[u].color = 'gray' 
      for v in self.adj[u]: 
>     if self.nodes[v].color == 'white': 
E     RuntimeError: maximum recursion depth exceeded in cmp 

scc.py:53: RuntimeError 
========================== 1 failed in 21.79 seconds =========================== 

我读过关于增加sys.setrecursionlimit,但这似乎并没有被推荐的做法。除了我不知道如何改进代码,因为它相当字面上实现了书中给出的伪代码。关于如何克服这个错误的任何想法?

回答

1

我设法解决问题,使用threading库增加stack_size和递归限制。以下是解决方案的代码:

import sys 
import pytest 
from collections import OrderedDict 
import copy 
import threading 


class Node(object): 
    def __init__(self, color='white', parent=None, d=None, f=None): 
     self.color = color 
     self.parent = parent 
     self.d = d   # Discovery time 
     self.f = f   # Finishing time 


class Graph(object): 
    def __init__(self, edges, node_indices=None): 
     self.edges = edges 
     self.nodes = self.initialize_nodes(node_indices) 
     self.adj = self.initialize_adjacency_list() 
     self.trees = dict() 

    def initialize_nodes(self, node_indices=None): 
     if node_indices is None: 
      node_indices = sorted(list(set(node for edge in self.edges for node in edge))) 
     return OrderedDict([(node_index, Node()) for node_index in node_indices]) 

    def initialize_adjacency_list(self): 
     A = {node: [] for node in self.nodes} 
     for edge in self.edges: 
      u, v = edge 
      A[u].append(v) 
     return A 

    def dfs(self): 
     self.time = 0 
     for u, node in self.nodes.items(): 
      if node.color == 'white': 
       self.dfs_visit(u, root=u) 

    def dfs_visit(self, u, root=None): 
     if u == root: 
      self.trees[root] = set() 
     self.time += 1 
     self.nodes[u].d = self.time 
     self.nodes[u].color = 'gray' 
     for v in self.adj[u]: 
      if self.nodes[v].color == 'white': 
       self.nodes[v].parent = u 
       self.trees[root].add(v) 
       self.dfs_visit(v, root=root) 
     self.nodes[u].color = 'black' 
     self.time += 1 
     self.nodes[u].f = self.time 

    @staticmethod 
    def transpose(edges): 
     return [(v,u) for (u,v) in edges] 

    def strongly_connected_components(self): 
     self.dfs() 
     finishing_times = {u: node.f for u, node in self.nodes.items()} 
     self.__init__(self.transpose(self.edges)) 
     node_indices = sorted(finishing_times, key=finishing_times.get, reverse=True) 
     self.nodes = self.initialize_nodes(node_indices) 
     self.dfs() 

     trees = copy.deepcopy(self.trees) 
     for k, v in trees.items(): 
      v.add(k) 
     return trees.values() 


@pytest.fixture 
def edges(): 
    with open('SCC.txt') as f: 
     return [tuple(map(int, line.split())) for line in f.read().splitlines()] 

def SCC_on_full_graph(): 
    E = edges() 
    graph = Graph(E) 
    SCCs = graph.strongly_connected_components() 

    SCC_sizes = sorted(list(map(len, SCCs)), reverse=True) 
    print(SCC_sizes[:5])       # Read off the size of the 5 largest SCCs 


if __name__ == "__main__": 
    threading.stack_size(67108864) 
    sys.setrecursionlimit(2**20) 
    thread = threading.Thread(target=SCC_on_full_graph) 
    thread.start() 
1

DFS必须在逻辑上是DFS,但通过编程方式您可以尝试解决。

  1. 中,这样你可以从顶部功能之一再试,如果达到一个接近递归限制的方式写的DFS。

  2. 尝试使用多处理。

PS: 对于较大的数据集有可能发生无限递归吗?当使用较大的数据集时出现逻辑错误。 如果您拥有增量大小的数据集,您还可以确定在python中执行算法的限制。