2017-08-10 66 views
1

我有一个xml文件,在句子下面有段落元素,句子元素和注释子元素。我想读这些注解元素和提取内容,并写入到像一个新的XML文件:将xml数据从一个xml解析为Java中的新xml

<sentence> 
     <Date></Date> 
     <Person></Person> 
     <NumberDate></NumberDate> 
     <Location></Location> 
     <etc></etc> 
    </sentence> 

在我的代码,我解析XML文件并阅读注释,但我只能够打印到控制台。我不知道如何继续以及如何导出到一个新的XML文件。

这里是我的代码:

 package domparserxml; 
     import java.io.File; 
     //package domparserxml; 
     import java.io.IOException; 
     import java.io.PrintStream; 
     import javax.xml.parsers.DocumentBuilder; 
     import javax.xml.parsers.DocumentBuilderFactory; 
     import javax.xml.parsers.ParserConfigurationException; 

     import org.w3c.dom.Document; 
     import org.w3c.dom.Element; 
     import org.w3c.dom.Node; 
     import org.w3c.dom.NodeList; 
     import org.xml.sax.SAXException; 

     public class DomParserXml { 

      public static void main(String[] args) { 
       // Tap into the xml 
      DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 

      try { 
       DocumentBuilder builder = factory.newDocumentBuilder(); 
       Document doc = builder.parse("Chrono.xml"); //This is my input xml file 
       NodeList paragraphList = doc.getElementsByTagName("paragraph");//getting the paragraph tags 
       for (int i=0;i<paragraphList.getLength();i++) { 
        Node p = paragraphList.item(i);//getting the paragraphs 
        if (p.getNodeType()==Node.ELEMENT_NODE) {//if the datatype is Node element than we can handle it 
         Element paragraph = (Element) p; 
         paragraph.getAttribute("id"); //get the paragraph id 
         paragraph.getAttribute("date");//get the paragraph date 
         NodeList sentenceList = paragraph.getChildNodes();//getting the sentence childnodes of the paragraph element 
         for(int j=0;j<sentenceList.getLength();j++) { 
          Node s = sentenceList.item(j); 
           if(s.getNodeType()==Node.ELEMENT_NODE) { 
           Element sentence = (Element) s; 
           //sentence.getAttribute("id"); //dont need it now 
           NodeList annotationList = sentence.getChildNodes();//the annotation tags or nodes are childnodes of the sentence element 
           int len = annotationList.getLength();  //to make it shorter and reusable 
           System.out.println("");   //added these two just to add spaces in between sentences 
           //System.out.println(""); 
           for(int a=0;a<len;a++) {  //here i am using 'len' i defined above. 
            Node anno = annotationList.item(a); 
            if(anno.getNodeType()==Node.ELEMENT_NODE) { 
             Element annotation = (Element) anno; 
             if(a ==1){   //if it is the first sentence of the paragraph, print all these below: 
              //PrintStream myconsole = new PrintStream(new File("C:\\Users\\ngwak\\Applications\\eclipse\\workfolder\\results.xml")); 
              //System.setOut(myconsole); 
              //myconsole.print("paragraph-id:" + paragraph.getAttribute("id") + ";" + "paragraph-date:" + paragraph.getAttribute("date") + ";" + "senteid:" + sentence.getAttribute("id") + ";" + annotation.getTagName() + ":" + annotation.getTextContent() + ";"); 
              System.out.print("paragraph-id:" + paragraph.getAttribute("id") + ";" + "paragraph-date:" + paragraph.getAttribute("date") + ";" + "senteid:" + sentence.getAttribute("id") + ";" + annotation.getTagName() + ":" + annotation.getTextContent() + ";"); 
          } 
            if (a>1){  // if there is more after the first sentence, don't write paragraph, id etc. again, just write what is new.. 
             //PrintStream myconsole = new PrintStream(new File("C:\\Users\\ngwak\\Applications\\eclipse\\workfolder\\results.xml")); 
           System.out.print(annotation.getTagName() + ":" + annotation.getTextContent() + ";"); 
             //myconsole.print("paragraph-id:" + paragraph.getAttribute("id") + " " + "paragraph-date:" + paragraph.getAttribute("date") + " " + "senteid:" + sentence.getAttribute("id") + " " + annotation.getTagName() + ":" + annotation.getTextContent() + " "); 
          } 

          } 

         } 

        } 
       } 
      } 

     } 
    } catch (ParserConfigurationException e) { 
     // TODO Auto-generated catch block 
     e.printStackTrace(); 
    } catch (SAXException e) { 
     // TODO Auto-generated catch block 
     e.printStackTrace(); 
    } catch (IOException e) { 
     // TODO Auto-generated catch block 
     e.printStackTrace(); 
    } 

    } 

} 

有人可以帮我。

谢谢。

回答

2

DOM提供了许多方便的类来轻松创建XML文件。首先,您必须创建一个Document,其中包含DocumentBuilder类,定义所有XML内容 - 节点,属性为元素类。最后,使用Transformer类将整个XML内容输出到流输出,通常是File。

看一看代码,您可以使用此代码刚过你在所有的值的paragraph变量

package com.sujit; 

import java.io.File; 

import javax.xml.parsers.DocumentBuilder; 
import javax.xml.parsers.DocumentBuilderFactory; 
import javax.xml.parsers.ParserConfigurationException; 
import javax.xml.transform.Transformer; 
import javax.xml.transform.TransformerConfigurationException; 
import javax.xml.transform.TransformerException; 
import javax.xml.transform.TransformerFactory; 
import javax.xml.transform.dom.DOMSource; 
import javax.xml.transform.stream.StreamResult; 

import org.w3c.dom.Document; 
import org.w3c.dom.Element; 

public class CreateXML { 

    public static void main(String[] args) { 
     DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance(); 
     DocumentBuilder docBuilder; 
     try 
     { 
      docBuilder = docFactory.newDocumentBuilder(); 
      // root elements 
      Document doc = docBuilder.newDocument(); 
      Element rootElement = doc.createElement("sentence"); //root 
      doc.appendChild(rootElement); 

      Element date = doc.createElement("date"); 
      date.appendChild(doc.createTextNode(paragraph.getAttribute("date"))); // child 
      rootElement.appendChild(date); 

      Element person = doc.createElement("person"); 
      person.appendChild(doc.createTextNode(paragraph.getAttribute("person"))); 
      rootElement.appendChild(person); 

      Element numberdate = doc.createElement("numberdate"); 
      numberdate.appendChild(doc.createTextNode(paragraph.getAttribute("numberDate"))); 
      rootElement.appendChild(numberdate); 

      Element location = doc.createElement("location"); 
      location.appendChild(doc.createTextNode(paragraph.getAttribute("location"))); 
      rootElement.appendChild(location); 

      TransformerFactory transformerFactory = TransformerFactory.newInstance(); 
      Transformer transformer = transformerFactory.newTransformer(); 
      DOMSource source = new DOMSource(doc); 
      File file = new File("E://file.xml"); 
      StreamResult result = new StreamResult(file); 

      transformer.transform(source, result); 

      System.out.println("File saved!");   

     } 

     catch (ParserConfigurationException e) 
     { 
      // TODO Auto-generated catch block 
      e.printStackTrace(); 
     } catch (TransformerConfigurationException e) { 
      // TODO Auto-generated catch block 
      e.printStackTrace(); 
     } catch (TransformerException e) { 
      // TODO Auto-generated catch block 
      e.printStackTrace(); 
     } 
    } 

} 

让我知道如果你仍然面临的任何问题。

+0

@ sForSujit是的...非常感谢,这应该给我我想要的,但它只能保存一句话。我在代码之前编写了一个if循环来遍历段落项目,但它没有任何区别。 –

+0

您是否将所有句子存储在集合中? – sForSujit

+0

是的,我有一个很大的xml和多个句子......所以对于每个句子,我想弄出孩子节点及其内容(日期,人物,位置等)并将其作为上面的格式。 –