2014-10-06 50 views


地震海啸也被连接到一个薄的 层沿着 板接口的最上部俯冲沉积岩的存在,如被认为存在于海洋地壳顶部的显着 地形区域,以及向上倾向传播为 ,可能到达海底。


(ROOT(S(NP(NN海啸)(NNS地震))(VP(VBP有)(ADVP( (NP(DT))(NP存在))(VP(VBN存在))(VP(VBN存在))(NP(NP存在) )(NP(NP)(JJ沉积)(NN岩石))(PP (IN沿)(NP(NP(NP) NP(DT)(JJS最上面)(NN部分))(PP(IN)) (NP(DT))(NN板)(NN界面))))(,,)(UCP(RB as)(VP(VBN想法)(S(VP(TO to)(VP(VB be)(ADJP))(PP(IN in)(NP(NP(NNS areas)) PP(IN(IN))(NP(IN))(NP(IN))(NP(IN))(NP(IN) DT)(JJ oceanic)(NN crust)))))))))))(,,) (CC and)(SBAR(WHADVP(WRB where))(S(NP(NN propagation)) (VBD was)(PP(INin)(NP(DT an)(JJ up-dip)(NN direction)))(,)(ADVP (可能的RB)))))))))(S (VP(VBG达到)(NP(DT the)(NN seafloor))))))))))))))(。 。)))


pasrsd_tree = NLTK.Tree.fromstring(parsetree_string) 


树( '根',[树( 'S',[树( 'NP',[树( 'NN',[” ('''')), 树('ADVP',[树'('''')), 树('NNS',['earthquakequakes'])] (''''))]),树('VP',[树('VBN', ['been']),树('VP' '')),树('PP',[Tree('TO', ['to']),树('NP',[Tree('NP',[Tree('DT',['the' ]),Tree('NN', ['presence'])]),树('PP',[Tree('IN',''of']),树('NP', [Tree('NP',[Tree('DT',['a']),Tree('JJ',['thin']),Tree('NN', ['layer'])])) ,Tree('PP',[Tree('IN',''of']),Tree('S',[Tree('VP', [Tree('VBN',['subducted']),Tree ('NP',[Tree('NP',[Tree('JJ', ['sedimentary']),Tree('NN',['rock'])]),Tree('PP',[Tree ('IN', ['along']),Tree('NP',[Tree('NP',[Tree('NP',[Tree('DT','''')), Tree (''',['of']),树(''''),树('NN',['部分'])]),树('PP', ' NP',[Tree('DT',['the']),Tree('NN', ['plate']),Tree('NN',['interface'])])])]),树(',',[',']), 树('UCP',[树('RB',['as']),树('S',[树('VP',[Tree 'VB', ['is']),树('VP',[Tree('VBN',['thought']),树('S',[树''VP', [至', ['to']),Tree('VP',[Tree('VB',['be']),Tree('ADJP', [Tree('JJ',['present']),Tree ''树'('IN',['in']),树('NP', [Tree('NP',[Tree('NNS',['areas'])]),Tree (''''),树('NN', ['topography',''''),'树' (Tree'(''',''at']),Tree('NP', [Tree('NP',[Tree(' ''''),树('NN',['top'])]),树('PP', [Tree('IN',['of']),树(' NP',[Tree('DT',['the']),Tree('JJ', ['oceanic']),Tree('NN',['crust'])])])])] )])])])])])]),树(',', [',']),树('CC',['和']),树('SBAR',[Tree 'WHADVP',[Tree('WRB', ['where'])]),Tree('S',[Tree('NP',[Tree('NN',['propagation'])]), 树('VP',[Tree('VBD', (''was']),Tree('PP',[Tree('IN',['in']), 树('NP',[Tree('DT',['an']),Tree 'JJ',['up-dip']),树('NN', ['direction'])])]),树(',',[',']),树('ADVP', Tree('S',[Tree('VP',[Tree('VBG')] [Tree''''''))Tree('RB', ['possible'])])])])) ('''')),树('NP',[Tree('DT',['the']),Tree('NN', ['seafloor'])])])])]]) ]))])])])]))])])]),Tree('。',['。'])])])

我的问题是,如果是pared_tree,我可以得到像top of the oceanic crusta thin layer这样的左边的实体吗?







>>> from nltk import Tree 
>>> parsed_tree = Tree('ROOT', [Tree('S', [Tree('NP', [Tree('NN', ['Tsunami']), Tree('NNS', ['earthquakes'])]), Tree('VP', [Tree('VBP', ['have']), Tree('ADVP', [Tree('RB', ['also'])]), Tree('VP', [Tree('VBN', ['been']), Tree('VP', [Tree('VBN', ['linked']), Tree('PP', [Tree('TO', ['to']), Tree('NP', [Tree('NP', [Tree('DT', ['the']), Tree('NN', ['presence'])]), Tree('PP', [Tree('IN', ['of']), Tree('NP', [Tree('NP', [Tree('DT', ['a']), Tree('JJ', ['thin']), Tree('NN', ['layer'])]), Tree('PP', [Tree('IN', ['of']), Tree('S', [Tree('VP', [Tree('VBN', ['subducted']), Tree('NP', [Tree('NP', [Tree('JJ', ['sedimentary']), Tree('NN', ['rock'])]), Tree('PP', [Tree('IN', ['along']), Tree('NP', [Tree('NP', [Tree('NP', [Tree('DT', ['the']), Tree('JJS', ['uppermost']), Tree('NN', ['part'])]), Tree('PP', [Tree('IN', ['of']), Tree('NP', [Tree('DT', ['the']), Tree('NN', ['plate']), Tree('NN', ['interface'])])])]), Tree(',', [',']), Tree('UCP', [Tree('RB', ['as']), Tree('S', [Tree('VP', [Tree('VBZ', ['is']), Tree('VP', [Tree('VBN', ['thought']), Tree('S', [Tree('VP', [Tree('TO', ['to']), Tree('VP', [Tree('VB', ['be']), Tree('ADJP', [Tree('JJ', ['present']), Tree('PP', [Tree('IN', ['in']), Tree('NP', [Tree('NP', [Tree('NNS', ['areas'])]), Tree('PP', [Tree('IN', ['of']), Tree('NP', [Tree('JJ', ['significant']), Tree('NN', ['topography'])])])])])]), Tree('PP', [Tree('IN', ['at']), Tree('NP', [Tree('NP', [Tree('DT', ['the']), Tree('NN', ['top'])]), Tree('PP', [Tree('IN', ['of']), Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['oceanic']), Tree('NN', ['crust'])])])])])])])])])])]), Tree(',', [',']), Tree('CC', ['and']), Tree('SBAR', [Tree('WHADVP', [Tree('WRB', ['where'])]), Tree('S', [Tree('NP', [Tree('NN', ['propagation'])]), Tree('VP', [Tree('VBD', ['was']), Tree('PP', [Tree('IN', ['in']), Tree('NP', [Tree('DT', ['an']), Tree('JJ', ['up-dip']), Tree('NN', ['direction'])])]), Tree(',', [',']), Tree('ADVP', [Tree('RB', ['possibly'])])])])])])])])]), Tree('S', [Tree('VP', [Tree('VBG', ['reaching']), Tree('NP', [Tree('DT', ['the']), Tree('NN', ['seafloor'])])])])])])])])])])])])])]), Tree('.', ['.'])])]) 

>>> np = [" ".join(i.leaves()) for i in parsed_tree.subtrees() if i.label() == 'NP'] 
>>> np 
['Tsunami earthquakes', 'the presence of a thin layer of subducted sedimentary rock along the uppermost part of the plate interface , as is thought to be present in areas of significant topography at the top of the oceanic crust , and where propagation was in an up-dip direction , possibly reaching the seafloor', 'the presence', 'a thin layer of subducted sedimentary rock along the uppermost part of the plate interface , as is thought to be present in areas of significant topography at the top of the oceanic crust , and where propagation was in an up-dip direction , possibly reaching the seafloor', 'a thin layer', 'sedimentary rock along the uppermost part of the plate interface , as is thought to be present in areas of significant topography at the top of the oceanic crust , and where propagation was in an up-dip direction , possibly', 'sedimentary rock', 'the uppermost part of the plate interface , as is thought to be present in areas of significant topography at the top of the oceanic crust , and where propagation was in an up-dip direction , possibly', 'the uppermost part of the plate interface', 'the uppermost part', 'the plate interface', 'areas of significant topography', 'areas', 'significant topography', 'the top of the oceanic crust', 'the top', 'the oceanic crust', 'propagation', 'an up-dip direction', 'the seafloor'] 


>>> np_mwe 
['Tsunami earthquakes', 'the presence of a thin layer of subducted sedimentary rock along the uppermost part of the plate interface , as is thought to be present in areas of significant topography at the top of the oceanic crust , and where propagation was in an up-dip direction , possibly reaching the seafloor', 'the presence', 'a thin layer of subducted sedimentary rock along the uppermost part of the plate interface , as is thought to be present in areas of significant topography at the top of the oceanic crust , and where propagation was in an up-dip direction , possibly reaching the seafloor', 'a thin layer', 'sedimentary rock along the uppermost part of the plate interface , as is thought to be present in areas of significant topography at the top of the oceanic crust , and where propagation was in an up-dip direction , possibly', 'sedimentary rock', 'the uppermost part of the plate interface , as is thought to be present in areas of significant topography at the top of the oceanic crust , and where propagation was in an up-dip direction , possibly', 'the uppermost part of the plate interface', 'the uppermost part', 'the plate interface', 'areas of significant topography', 'significant topography', 'the top of the oceanic crust', 'the top', 'the oceanic crust', 'an up-dip direction', 'the seafloor'] 


>>> np_mwe_nocomma = [j for j in [" ".join(i.leaves()) for i in parsed_tree.subtrees() if i.label() == 'NP'] if j.count(' ') > 0 and j.count(',') == 0] 
>>> np_mwe_nocomma 
['Tsunami earthquakes', 'the presence', 'a thin layer', 'sedimentary rock', 'the uppermost part of the plate interface', 'the uppermost part', 'the plate interface', 'areas of significant topography', 'significant topography', 'the top of the oceanic crust', 'the top', 'the oceanic crust', 'an up-dip direction', 'the seafloor'] 


>> x = [] 
>>> for i in sorted(np_mwe_nocomma, key=len, reverse=True): 
...  for j in x: 
...    if i in j: 
...      continue 
...  print i 
...  x.append(i) 
the uppermost part of the plate interface 
areas of significant topography 
the top of the oceanic crust 
significant topography 
Tsunami earthquakes 
the plate interface 
an up-dip direction 
the uppermost part 
the oceanic crust 
sedimentary rock 
the presence 
a thin layer 
the seafloor 



谢谢,您的答案确实有帮助并且非常鼓舞人心。 – Sean 2014-10-08 06:30:51