2013-05-14 34 views
0

我试图从uniprot XML文件中选择一些数据,并且我能够获得我想要的大部分内容,但是我遇到了获取数据输出在同一节点中具有更多条目。最好将它们结合在一起。选择具有相同节点名称的数据并合并来自XML文件的数据

XML代码:

<?xml version='1.0' encoding='UTF-8'?> 
<?xml-stylesheet href="test_will7.xslt" type="text/xsl" ?> 

<uniprot> 

<entry dataset="Swiss-Prot" created="1993-04-01" modified="2012-11-28" version="118"> 
<accession>P30443</accession> 
<accession>O77964</accession> 
<name>1A01_HUMAN</name> 
<protein> 
<recommendedName> 
<fullName>HLA class I histocompatibility antigen, A-1 alpha chain</fullName> 
</recommendedName> 
</protein> 
<gene> 
<name type="primary">HLA-A</name> 
<name type="synonym">HLAA</name> 
</gene> 
</comment> 
<comment type="subcellular location"> 
<subcellularLocation> 
<location>Membrane</location> 
<topology>Single-pass type I membrane protein</topology> 
</subcellularLocation> 
</comment> 
<dbReference type="GO" id="GO:0031901"> 
<property type="term" value="C:early endosome membrane"/> 
<property type="evidence" value="TAS:Reactome"/> 
</dbReference> 
<dbReference type="GO" id="GO:0012507"> 
<property type="term" value="C:ER to Golgi transport vesicle membrane"/> 
<property type="evidence" value="TAS:Reactome"/> 
</dbReference> 
<dbReference type="GO" id="GO:0000139"> 
<property type="term" value="C:Golgi membrane"/> 
<property type="evidence" value="TAS:Reactome"/> 
</dbReference> 
</entry> 

<entry dataset="Swiss-Prot" created="1986-07-21" modified="2012-11-28" version="151"> 
<accession>P01892</accession> 
<accession>O19619</accession> 
<accession>P06338</accession> 
<name>1A02_HUMAN</name> 
<protein> 
<recommendedName> 
<fullName>HLA class I histocompatibility antigen, A-2 alpha chain</fullName> 
</recommendedName> 
</protein> 
<gene> 
<name type="primary">HLA-A</name> 
<name type="synonym">HLAA</name> 
</gene> 
<comment type="subcellular location"> 
<subcellularLocation> 
<location>Membrane</location> 
<topology>Single-pass type I membrane protein</topology> 
</subcellularLocation> 
</comment> 
<dbReference type="GO" id="GO:0060333"> 
<property type="term" value="P:interferon-gamma-mediated signaling pathway"/> 
<property type="evidence" value="TAS:Reactome"/> 
</dbReference> 
</entry> 

<entry dataset="Swiss-Prot" created="1987-08-13" modified="2012-11-28" version="124"> 
<accession>P04439</accession> 
<name>1A03_HUMAN</name> 
<protein> 
<recommendedName> 
<fullName>HLA class I histocompatibility antigen, A-3 alpha chain</fullName> 
</recommendedName> 
</protein> 
<gene> 
<name type="primary">HLA-A</name> 
<name type="synonym">HLAA</name> 
</gene> 
<comment type="subcellular location"> 
<subcellularLocation> 
<location>Membrane</location> 
<topology>Single-pass type I membrane protein</topology> 
</subcellularLocation> 
</comment> 
<dbReference type="GO" id="GO:0005887"> 
<property type="term" value="C:integral to plasma membrane"/> 
<property type="evidence" value="NAS:UniProtKB"/> 
</dbReference> 
<dbReference type="GO" id="GO:0019048"> 
<property type="term" value="P:virus-host interaction"/> 
<property type="evidence" value="IEA:UniProtKB-KW"/> 
</dbReference> 
</entry> 
</uniprot> 

我的XSLT文件现在看起来是这样。但是,我仍然在做错事,因为它不起作用。也许是因为不同的关卡?

<?xml version="1.0" ?> 
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"> 

    <xsl:template match="/"> 
    <html> 
     <body> 
     <h2>My Selection</h2> 
     <table border="1"> 
      <tr bgcolor="#9acd32"> 
      <th>Name</th> 
      <th>GeneName</th> 
      <th>AccessionNr</th> 
      <th>ProteinName</th> 
      <th>SubcellularLocation</th> 
      <th>TissueSpecificity</th> 
      <th>GOID</th> 
      <th>GOName</th> 
      </tr> 
      <xsl:apply-templates/> 
     </table> 
     </body> 
    </html> 
    </xsl:template> 

    <xsl:template match="uniprot/entry"> 
    <tr> 
     <xsl:apply-templates select="name|gene/name|accession|protein/recommendedName/fullName|comment[@type = 'subcellular location']/subcellularLocation/location|comment[@type = 'tissue specificty']/text|dbReference[@type = 'GO']/@id|dbReference[@type = 'GO']/property[@type = 'term']/@value"/> 
    </tr> 
    </xsl:template> 

     <xsl:template match="name|gene/name|accession|protein/recommendedName/fullName|comment[@type = 'subcellular location']/subcellularLocation/location|comment[@type = 'tissue specificty']/text|dbReference[@type = 'GO']/@id|dbReference[@type = 'GO']/property[@type = 'term']/@value"> 
    <xsl:choose> 
     <xsl:when test="name()='dbReference[@type = 'GO']/@id|dbReference[@type = 'GO']/property[@type = 'term']/@value' and not(preceding-sibling::dbReference[@type = 'GO']/@id|dbReference[@type = 'GO']/property[@type = 'term']/@value)"> 
     <td> 
      <xsl:value-of select="."/> 
      <xsl:if test="following-sibling::dbReference[@type = 'GO']/@id|dbReference[@type = 'GO']/property[@type = 'term']/@value"> 
      <xsl:text>;</xsl:text> 
      <xsl:for-each select="following-sibling::dbReference[@type = 'GO']/@id|dbReference[@type = 'GO']/property[@type = 'term']/@value"> 
       <xsl:value-of select="."/> 
       <xsl:if test="position()!=last()"> 
       <xsl:text>;</xsl:text> 
       </xsl:if> 
      </xsl:for-each> 
      </xsl:if> 
     </td> 
     </xsl:when> 
     <xsl:when test="name()='dbReference[@type = 'GO']/@id|dbReference[@type = 'GO']/property[@type = 'term']/@value' and preceding-sibling::dbReference[@type = 'GO']/@id|dbReference[@type = 'GO']/property[@type = 'term']/@value"/> 
     <xsl:otherwise> 
     <td> 
      <xsl:value-of select="."/> 
     </td> 
     </xsl:otherwise> 
    </xsl:choose> 
    </xsl:template> 

</xsl:stylesheet> 

我想要的输出:

Name GeneName AccessionNr ProteinName SubcellularLocation GOID_ GOName 
1A01_HUMAN HLA-A P30443 HLA class I histocompatibility antigen, A-1 alpha chain Membrane GO:0031901- C:early endosome membrane; GO:0012507- C:ER to Golgi transport vesicle membrane; GO:0000139- C:Golgi membrane 
1A02_HUMAN HLA-A P01892 HLA class I histocompatibility antigen, A-2 alpha chain Membrane GO:0060333-P:interferon-gamma-mediated signaling pathway 
1A03_HUMAN HLA-A P04439 HLA class I histocompatibility antigen, A-3 alpha chain Membrane GO:0005887- C:integral to plasma membrane; GO:0019048- P:virus-host interaction 

如果这是太困难,这也可能是这样的:

Name GeneName AccessionNr ProteinName SubcellularLocation GOID GOName 
1A01_HUMAN HLA-A P30443 HLA class I histocompatibility antigen, A-1 alpha chain Membrane GO:0031901; GO:0012507; GO:0000139 C:early endosome membrane; C:ER to Golgi transport vesicle membrane; C:Golgi membrane 
1A02_HUMAN HLA-A P01892 HLA class I histocompatibility antigen, A-2 alpha chain Membrane GO:0060333 P:interferon-gamma-mediated signaling pathway 
1A03_HUMAN HLA-A P04439 HLA class I histocompatibility antigen, A-3 alpha chain Membrane GO:0005887; GO:0019048 C:integral to plasma membrane; P:virus-host interaction 

我知道这是很多,而且相当困难的区分一切。我可以阅读代码,但修复错误或写新内容仍然非常困难! (并且我是XML新手) 谢谢!

+0

PLS。把正确的XML文件。如果您的XML'有机体'元素没有任何结束标记。 – 2013-05-14 10:07:13

+0

复制/粘贴错误,谢谢。我纠正了它。 – user1941884 2013-05-15 00:40:48

回答

1

我从XML输入被删除的 '有机体',并创造了新的XSLT以得到所需的输出:

XSLT:

<?xml version="1.0" encoding="ISO-8859-1"?> 
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"> 
    <xsl:template match="uniprot"> 
    <html> 
     <body> 
     <h2>My Selection</h2> 
     <table border="1"> 
      <tr bgcolor="#9acd32"> 
      <th>Name</th> 
      <th>GeneName</th> 
      <th>AccessionNr</th> 
      <th>ProteinName</th> 
      <th>SubcellularLocation</th> 
      <th>TissueSpecificity</th> 
      <th>GOID</th> 
      <th>GOName</th> 
      </tr> 
      <xsl:for-each select="entry"> 
      <tr> 
       <td><xsl:value-of select="name"/></td> 
       <td><xsl:value-of select="gene/name"/></td> 
       <td><xsl:value-of select="accession"/></td> 
       <td><xsl:value-of select="protein/recommendedName/fullName"/></td> 
       <td><xsl:value-of select="comment[@type = 'subcellular location']/subcellularLocation/location"/></td> 
       <td><xsl:value-of select="comment[@type = 'tissue specificty']"/></td> 
       <td> 
       <xsl:for-each select="dbReference[@type = 'GO']"> 
        <xsl:value-of select="concat(@id,'- ',property[@type = 'term']/@value,'; ')"/> 
       </xsl:for-each> 
       </td> 
      </tr> 
      </xsl:for-each> 
     </table> 
     </body> 
    </html> 
    </xsl:template> 
</xsl:stylesheet> 

OUTPUT:

<html> 
    <body> 
     <h2>My Selection</h2> 
     <table border="1"> 
     <tr bgcolor="#9acd32"> 
      <th>Name</th> 
      <th>GeneName</th> 
      <th>AccessionNr</th> 
      <th>ProteinName</th> 
      <th>SubcellularLocation</th> 
      <th>TissueSpecificity</th> 
      <th>GOID</th> 
      <th>GOName</th> 
     </tr> 
     <tr> 
      <td>1A01_HUMAN</td> 
      <td>HLA-A</td> 
      <td>P30443</td> 
      <td>HLA class I histocompatibility antigen, A-1 alpha chain</td> 
      <td>Membrane</td> 
      <td></td> 
      <td>GO:0031901- C:early endosome membrane; GO:0012507- C:ER to Golgi transport vesicle membrane; GO:0000139- C:Golgi membrane; </td> 
     </tr> 
     <tr> 
      <td>1A02_HUMAN</td> 
      <td>HLA-A</td> 
      <td>P01892</td> 
      <td>HLA class I histocompatibility antigen, A-2 alpha chain</td> 
      <td>Membrane</td> 
      <td></td> 
      <td>GO:0060333- P:interferon-gamma-mediated signaling pathway; </td> 
     </tr> 
     <tr> 
      <td>1A03_HUMAN</td> 
      <td>HLA-A</td> 
      <td>P04439</td> 
      <td>HLA class I histocompatibility antigen, A-3 alpha chain</td> 
      <td>Membrane</td> 
      <td></td> 
      <td>GO:0005887- C:integral to plasma membrane; GO:0019048- P:virus-host interaction; </td> 
     </tr> 
     </table> 
    </body> 
</html> 
+0

谢谢!你是一个救星! – user1941884 2013-05-15 00:49:30

相关问题