2013-05-10 52 views
1

我试图将文本墙分割为文本块和文本列表,其中文本列表基于标题通过关键字分开设置。我想最好的办法是递归。不幸的是,当尝试检查给定变量*** TypeError: 'str' object is not callable的类型时,我收到以下错误。当我直接拨打type(var)时,我在PDB中收到同样的错误。这似乎没有道理,所以我担心这是我没有看到的。Python TypeError:'str'对象在调用类型函数时无法调用

这是我认为是代码的相关部分。让我知道,如果你觉得你需要看更多

def separate(text,boundary = None): 
    pdb.set_trace() 
    if boundary == None: 
     m = re.findall(r'(?<=boundary=).*',text) 
     i = 0 
     textList = [text] 
     while i < len(m): #have all levels of Boundary/headers named 
      boundary = m[i] 
      textList = recursiveSplit(textList,boundary) 
      i += 1 

    return textList 

def recursiveSplit(chunk,boundary): 
    if type(chunk) is types.ListType: #error occurs here 
     for object in chunk: 
      recursiveSplit(object,boundary) 
    if type(chunk) is types.StringType: 
     list = re.split(r'(?P<boundary>)(?!--)',chunk) 
     return list 
    return None 

全部代码。需要一个文本文件。您可以使用任何MIME电子邮件。我也将上传我使用测试

#Textbasics email parser 
#based on a "show original" file converted into text 

from sys import argv 
import re, os, pdb, types 

script, filename = argv 
text = open(filename).read() 
type = "text only" #Set the default type of email 

#cut the email up by sections 
#--A section is defined as any time there are two line breaks in a row 
textList = re.split(r"\n\n", text) 
header = textList[0] 
if re.search(r'MIME-Version',header): 
    type = "MIME" 

# If mail has no attachments, parse as a text-only email 
class Parser(object): 

    def __init__(self,textList): 
     a = 1 
     self.body = "" 
     self.textList = textList 
     self.header = textList[0] 
     while a < len(textList): 
      self.body = self.body + textList[a] + '\n\n' 
      a += 1 

     m = re.search(r'(?<=Subject:).*', self.header) 
     self.subject = m.group(0) 

     m = re.search(r'(?<=From:).*', self.header) 
     self.fromVar = m.group(0) 

     m = re.search(r'(?<=To:).*', self.header) 
     self.toVar = m.group(0) 

     m = re.search(r'(?<=Date:)\w+\s\w+\s\w+', self.header) 
     self.date = m.group(0) 

    def returnParsed(self,descriptor = "all"): 
     if descriptor == "all": 
      retv = "Subject: " + self.subject + "\n" + "From: " + self.fromVar + "\n" + "To: " + self.toVar + "\n" + "Date: " + self.date + "\n" + "\n" + self.body 
      return retv 

     if descriptor == "subject": 
      return self.subject 
     if descriptor == "fromVar": 
      return self.fromVar 
     if descriptor == "toVar": 
      return self.toVar 
     if descriptor == "date": 
      return self.date 
     if descriptor == "body": 
      return self.body 

class MIMEParser(Parser): 

    class MIMEDataDecoder(object): 
     def __init__(self,decodeString,type): 
      pass  


    def __init__(self,textList): 
     self.textList = textList 
     self.nestedItems = [] 
     newItem = NestedItem(self) 
     newItem.setContentType("Header") 
     newItem.setValue(self.textList[0]) 
     self.nestedItems.append(newItem) 
     if re.search(r'(boundary=)',newItem.value): 
      helperItem = NestedItem(self) 
      helperItem.value = (self.textList[0]) 
      m = re.search(r'(?<=Content-Type:).+(?=;)',newItem.value) 
      helperItem.setContentType(m.group(0)) 
      self.nestedItems.append(helperItem) 

     self.organizeData() 
     """i = 0 
     while i < len(self.textList): 
      newItem = NestedItem(self) 
      ct = self.nextContentType 
      newItem.setContentType(ct) 
      newItem.setValue(self.textList[i]) 
      self.nestedItems.append(newItem) 
      m = re.search(r'(?<=Content-Type:).+(?=;)',self.textList[i]) 
      if m: 
       self.nextContentType = m.group(0) 
      i += 1 
      """ 

    def nestItem (self,item): 
     self.nestedItems.append(item) 

    def organizeData(self): 
     self.nestLevel = 1 
     self.currentSuper = self 
     m = re.search(r'(?<=boundary=).*',self.textList[0]) 
     self.currentBoundary = m.group(0) 
     self.currentList = self.textList 
     self.currentList.remove(self.textList[0]) 
     self.formerObjectDatabase = {} 
     pdb.set_trace() 
     while self.nestLevel > 0: 
      i = 0 
      while i < len(self.currentList): 

       boundary = self.currentBoundary 
       #If block is a "normal block", containing a current boundary identifier 
       p = re.search(r'--(?P<boundary>)(?!--)', text) 
       if p: 
        newItem = NestedItem(self.currentSuper) 
        newItem.setValue(self.currentList[i]) 
        r = re.search(r'(?<=Content-Type:).+(?=;)',newItem.value) 
        if r: 
         newItem.setContentType(r.group(0)) 
        self.currentObject = newItem 
        self.currentSuper.nestItem(self.currentObject) 
       #If the block contains a new block boundary 
       m = re.search(r'(?<=boundary=).*',self.currentList[i]) 
       if m: 
        #begin new layer of recursive commands 
        newFormerObject = self.FormerCurrentObject(self.currentList,self.currentSuper,self.currentBoundary) 
        self.formerObjectDatabase[self.nestLevel] = newFormerObject 
        self.currentSuper = self.currentObject 
        self.nestLevel += 1 
        self.currentBoundary = m.group(0) 
        boundary = self.currentBoundary 
        #self.currentList = re.split(r'--(?P<boundary>)(?!--)', self.currentList[i]) 
       boundary = self.currentBoundary 
       #If block contains an "end of boundary" marker 
       q = re.search(r'(?P<boundary>)--', text) 
       if q: 
        self.nestLevel -= 1 
        currentObject = self.formerObjectDatabase[self.nestLevel] 
        self.currentList = currentObject.formerList 
        self.currentSuper = currentObject.formerSuper 
        self.currentBoundary = currentObject.formerBoundary 
       i += 1      


    class FormerCurrentObject: 
     def __init__(self,formerList,formerSuper,formerBoundary): 
      self.formerList = formerList 
      self.formerSuper = formerSuper 
      self.formerBoundary = formerBoundary 




    def printAll(self): 
     print "printing all: %d" % len(self.nestedItems) 
     i = 0 
     while i < len(self.nestedItems): 
      print "printing out item %d" % i 
      self.nestedItems[i].printOut() 
      i += 1 

class NestedItem(object): 
    def __init__(self,superObject,contentType=" ",value = " "): 
     self.superObject = superObject 
     self.contentType = contentType 
     self.value = value 
     self.nestedItems = [] 

    def nestItem(self,item): 
     self.nestedItems.append(item) 

    def printOut(self,printBuffer = ""): 
     print printBuffer + '++%s' % self.contentType 
     print printBuffer + self.value 
     a = 0 
     printBuffer = printBuffer + " " 
     while a < len(self.nestedItems): 
      self.nestedItems[a].printOut(printBuffer) 

    def setContentType(self,contentType): 
     self.contentType = contentType 

    def setValue(self,value): 
     self.value = value 



if type == "text only": 
    p = Parser(textList) 
    print p.returnParsed() 

def separate(text,boundary = None): 
    pdb.set_trace() 
    if boundary == None: 
     m = re.findall(r'(?<=boundary=).*',text) 
     i = 0 
     textList = [text] 
     while i < len(m): #have all levels of Boundary/headers named 
      boundary = m[i] 
      textList = recursiveSplit(textList,boundary) 
      i += 1 

    return textList 

def recursiveSplit(chunk,boundary): 
    if type(chunk) is types.ListType: #<<--error occurs here 
     for obj in chunk: 
      recursiveSplit(obj,boundary) 
    if type(chunk) is types.StringType: 
     list = re.split(r'(?P<boundary>)(?!--)',chunk) 
     return list 
    return None 


if type == "MIME": 
    #separate the text file instead by its boundary identifier 
    p = MIMEParser(separate(text)) 
    p.printAll() 
+0

I吨将有助于有一个完整的代码来重现错误。但只是为了测试一个预感,尝试用''替代块中的对象:'用块代替obj:' – 2013-05-10 16:59:11

+0

不应该使用'object'作为变量名称,因为'object'也是一个类型。话虽如此,我不知道它是否有任何影响。 – pcalcao 2013-05-10 17:02:13

+0

@pcalcao这也是我的预感,如果'str'是一个新的类'str(object)',以某种方式遮蔽对象会破坏字符串的创建......但我只是在本地进行了测试,而不是。就目前而言,我无法用上面的代码重现错误,主要是因为它只是打印'(pdb)'并在无限递归中停顿。 – 2013-05-10 17:05:48

回答

11

你分配一个字符串类型的电子邮件:

type = "text only" 

,然后调用它

if type(chunk)... 

提高例外:

*** TypeError: 'str' object is not callable